# Segmentation Example
> Train a U-Net for pixelwise segmentation of the prostate

In [1]:
import monai
import ignite

from prostate158.utils import load_config
from prostate158.train import SegmentationTrainer
from prostate158.report import ReportGenerator
from prostate158.viewer import ListViewer

### Installation Procedure of dependencies 

In [2]:
# # if you have a requirements.txt:
# !pip install --upgrade pip
# !pip install -r requirements.txt

# # Otherwise install core libs directly:
# !pip install monai["all"] ignite matplotlib pyyaml munch

# Finally
# !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# !pip install opencv-python
# !pip install ipywidgets

In [2]:
import torch

print(torch.cuda.is_available())  # For PyTorch

True


All parameters needed for training and evaluation are set in `anatomy.yaml` file. 

In [3]:
cfg = load_config("anatomy.yaml")  # change to 'anatomy.yaml' for anatomy segmentation
# cfg = load_config("tumor.yaml")
monai.utils.set_determinism(seed=cfg.seed)
cfg.model.type = "rrunet3d"
# cfg.model.type = "unet"

In [4]:
from prostate158.model import get_model

model = get_model(cfg).to(cfg.device)  # Move the model to the selected device
print(model)

[get_model] model.type = 'rrunet3d'
RRUNet3D(
  (pool): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (unpool): Upsample(scale_factor=2.0, mode='nearest')
  (input_conv): Conv3d(1, 16, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
  (output_conv): Conv3d(16, 3, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
  (output_activation): Softmax(dim=1)
  (encoders): ModuleList(
    (0): ResidualBlock(
      (CONV): ConvBlock(
        (ops): ModuleList(
          (0): Conv3d(16, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
          (1): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
      )
      (RCNN): Sequential(
        (0): RecurrentBlock(
          (conv): Sequential(
            (0): Conv3d(16, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
            (1): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_s

Create supervised trainer for segmentation task

In [5]:
trainer = SegmentationTrainer(
    progress_bar=True,
    early_stopping=True,
    metrics=["MeanDice", "HausdorffDistance", "SurfaceDistance"],
    save_latest_metrics=True,
    config=cfg,
)

# # Load pre-trained weights from tumor.pt
trainer.load_checkpoint("models/network_anatomy_9_key_metric_205_epoch=0.6790.pt")

[get_model] model.type = 'rrunet3d'
Loading checkpoint from models/network_anatomy_9_key_metric_205_epoch=0.6790.pt to device cuda:0


Adding a learning rate scheduler for one-cylce policy. 

In [6]:
trainer.fit_one_cycle()

Let's train. This can take several hours. 

In [None]:
trainer.run()

`torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
`torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.


2025-08-11 18:32:27,145 - INFO - Epoch: 1/500, Iter: 1/119 -- train_loss: 1.2907 


[1/119]   1%|           [00:00<?]

2025-08-11 18:32:27,397 - INFO - Epoch: 1/500, Iter: 2/119 -- train_loss: 1.4039 
2025-08-11 18:32:27,645 - INFO - Epoch: 1/500, Iter: 3/119 -- train_loss: 1.5261 
2025-08-11 18:32:27,878 - INFO - Epoch: 1/500, Iter: 4/119 -- train_loss: 1.3336 
2025-08-11 18:32:28,095 - INFO - Epoch: 1/500, Iter: 5/119 -- train_loss: 1.2843 
2025-08-11 18:32:30,154 - INFO - Epoch: 1/500, Iter: 6/119 -- train_loss: 1.4816 
2025-08-11 18:32:30,410 - INFO - Epoch: 1/500, Iter: 7/119 -- train_loss: 1.4172 
2025-08-11 18:32:30,628 - INFO - Epoch: 1/500, Iter: 8/119 -- train_loss: 1.3695 
2025-08-11 18:32:30,855 - INFO - Epoch: 1/500, Iter: 9/119 -- train_loss: 1.4262 
2025-08-11 18:32:31,078 - INFO - Epoch: 1/500, Iter: 10/119 -- train_loss: 1.4341 
2025-08-11 18:32:31,304 - INFO - Epoch: 1/500, Iter: 11/119 -- train_loss: 1.3810 
2025-08-11 18:32:31,528 - INFO - Epoch: 1/500, Iter: 12/119 -- train_loss: 1.3260 
2025-08-11 18:32:31,778 - INFO - Epoch: 1/500, Iter: 13/119 -- train_loss: 1.3980 
2025-08-11 1

`torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.


[1/20]   5%|5          [00:00<?]

2025-08-11 18:35:20,367 - INFO - Epoch: 2/500, Iter: 1/119 -- train_loss: 1.2916 


[1/119]   1%|           [00:00<?]

2025-08-11 18:35:20,601 - INFO - Epoch: 2/500, Iter: 2/119 -- train_loss: 1.3864 
2025-08-11 18:35:20,850 - INFO - Epoch: 2/500, Iter: 3/119 -- train_loss: 1.3946 
2025-08-11 18:35:21,050 - INFO - Epoch: 2/500, Iter: 4/119 -- train_loss: 1.4025 
2025-08-11 18:35:21,289 - INFO - Epoch: 2/500, Iter: 5/119 -- train_loss: 1.3367 
2025-08-11 18:35:21,513 - INFO - Epoch: 2/500, Iter: 6/119 -- train_loss: 1.3694 
2025-08-11 18:35:21,768 - INFO - Epoch: 2/500, Iter: 7/119 -- train_loss: 1.3580 
2025-08-11 18:35:22,035 - INFO - Epoch: 2/500, Iter: 8/119 -- train_loss: 1.5208 
2025-08-11 18:35:22,346 - INFO - Epoch: 2/500, Iter: 9/119 -- train_loss: 1.3754 
2025-08-11 18:35:22,611 - INFO - Epoch: 2/500, Iter: 10/119 -- train_loss: 1.3098 
2025-08-11 18:35:23,504 - INFO - Epoch: 2/500, Iter: 11/119 -- train_loss: 1.5869 
2025-08-11 18:35:23,728 - INFO - Epoch: 2/500, Iter: 12/119 -- train_loss: 1.4470 
2025-08-11 18:35:23,975 - INFO - Epoch: 2/500, Iter: 13/119 -- train_loss: 1.4259 
2025-08-11 1

[1/20]   5%|5          [00:00<?]

2025-08-11 18:38:03,555 - INFO - Epoch: 3/500, Iter: 1/119 -- train_loss: 1.2812 


[1/119]   1%|           [00:00<?]

2025-08-11 18:38:05,308 - INFO - Epoch: 3/500, Iter: 2/119 -- train_loss: 1.3945 
2025-08-11 18:38:05,540 - INFO - Epoch: 3/500, Iter: 3/119 -- train_loss: 1.4067 
2025-08-11 18:38:05,761 - INFO - Epoch: 3/500, Iter: 4/119 -- train_loss: 1.3444 
2025-08-11 18:38:05,962 - INFO - Epoch: 3/500, Iter: 5/119 -- train_loss: 1.3574 
2025-08-11 18:38:06,228 - INFO - Epoch: 3/500, Iter: 6/119 -- train_loss: 1.4507 
2025-08-11 18:38:06,468 - INFO - Epoch: 3/500, Iter: 7/119 -- train_loss: 1.3773 
2025-08-11 18:38:06,724 - INFO - Epoch: 3/500, Iter: 8/119 -- train_loss: 1.4411 
2025-08-11 18:38:06,992 - INFO - Epoch: 3/500, Iter: 9/119 -- train_loss: 1.2837 
2025-08-11 18:38:09,093 - INFO - Epoch: 3/500, Iter: 10/119 -- train_loss: 1.3583 
2025-08-11 18:38:09,340 - INFO - Epoch: 3/500, Iter: 11/119 -- train_loss: 1.3493 
2025-08-11 18:38:09,551 - INFO - Epoch: 3/500, Iter: 12/119 -- train_loss: 1.4470 
2025-08-11 18:38:09,767 - INFO - Epoch: 3/500, Iter: 13/119 -- train_loss: 1.3317 
2025-08-11 1

[1/20]   5%|5          [00:00<?]

2025-08-11 18:40:46,010 - INFO - Epoch: 4/500, Iter: 1/119 -- train_loss: 1.2874 


[1/119]   1%|           [00:00<?]

2025-08-11 18:40:46,268 - INFO - Epoch: 4/500, Iter: 2/119 -- train_loss: 1.5446 
2025-08-11 18:40:46,510 - INFO - Epoch: 4/500, Iter: 3/119 -- train_loss: 1.2809 
2025-08-11 18:40:46,760 - INFO - Epoch: 4/500, Iter: 4/119 -- train_loss: 1.2713 
2025-08-11 18:40:46,991 - INFO - Epoch: 4/500, Iter: 5/119 -- train_loss: 1.2988 
2025-08-11 18:40:47,252 - INFO - Epoch: 4/500, Iter: 6/119 -- train_loss: 1.2912 
2025-08-11 18:40:47,514 - INFO - Epoch: 4/500, Iter: 7/119 -- train_loss: 1.3223 
2025-08-11 18:40:47,743 - INFO - Epoch: 4/500, Iter: 8/119 -- train_loss: 1.3703 
2025-08-11 18:40:51,267 - INFO - Epoch: 4/500, Iter: 9/119 -- train_loss: 1.4510 
2025-08-11 18:40:51,487 - INFO - Epoch: 4/500, Iter: 10/119 -- train_loss: 1.3089 
2025-08-11 18:40:51,728 - INFO - Epoch: 4/500, Iter: 11/119 -- train_loss: 1.3581 
2025-08-11 18:40:51,958 - INFO - Epoch: 4/500, Iter: 12/119 -- train_loss: 1.3331 
2025-08-11 18:40:52,174 - INFO - Epoch: 4/500, Iter: 13/119 -- train_loss: 1.4895 
2025-08-11 1

[1/20]   5%|5          [00:00<?]

2025-08-11 18:43:38,438 - INFO - Epoch: 5/500, Iter: 1/119 -- train_loss: 1.3283 


[1/119]   1%|           [00:00<?]

2025-08-11 18:43:38,703 - INFO - Epoch: 5/500, Iter: 2/119 -- train_loss: 1.4150 
2025-08-11 18:43:40,497 - INFO - Epoch: 5/500, Iter: 3/119 -- train_loss: 1.4006 
2025-08-11 18:43:40,781 - INFO - Epoch: 5/500, Iter: 4/119 -- train_loss: 1.4222 
2025-08-11 18:43:40,988 - INFO - Epoch: 5/500, Iter: 5/119 -- train_loss: 1.4262 
2025-08-11 18:43:41,222 - INFO - Epoch: 5/500, Iter: 6/119 -- train_loss: 1.3437 
2025-08-11 18:43:41,487 - INFO - Epoch: 5/500, Iter: 7/119 -- train_loss: 1.3100 
2025-08-11 18:43:41,727 - INFO - Epoch: 5/500, Iter: 8/119 -- train_loss: 1.3810 
2025-08-11 18:43:41,976 - INFO - Epoch: 5/500, Iter: 9/119 -- train_loss: 1.4936 
2025-08-11 18:43:42,237 - INFO - Epoch: 5/500, Iter: 10/119 -- train_loss: 1.4866 
2025-08-11 18:43:43,797 - INFO - Epoch: 5/500, Iter: 11/119 -- train_loss: 1.4079 
2025-08-11 18:43:44,050 - INFO - Epoch: 5/500, Iter: 12/119 -- train_loss: 1.3411 
2025-08-11 18:43:44,305 - INFO - Epoch: 5/500, Iter: 13/119 -- train_loss: 1.3453 
2025-08-11 1

[1/20]   5%|5          [00:00<?]

2025-08-11 18:46:24,019 - INFO - Epoch: 6/500, Iter: 1/119 -- train_loss: 1.4179 


[1/119]   1%|           [00:00<?]

2025-08-11 18:46:26,474 - INFO - Epoch: 6/500, Iter: 2/119 -- train_loss: 1.3984 
2025-08-11 18:46:26,748 - INFO - Epoch: 6/500, Iter: 3/119 -- train_loss: 1.4094 
2025-08-11 18:46:27,539 - INFO - Epoch: 6/500, Iter: 4/119 -- train_loss: 1.3482 
2025-08-11 18:46:27,778 - INFO - Epoch: 6/500, Iter: 5/119 -- train_loss: 1.3268 
2025-08-11 18:46:28,029 - INFO - Epoch: 6/500, Iter: 6/119 -- train_loss: 1.4519 
2025-08-11 18:46:28,245 - INFO - Epoch: 6/500, Iter: 7/119 -- train_loss: 1.3080 
2025-08-11 18:46:28,479 - INFO - Epoch: 6/500, Iter: 8/119 -- train_loss: 1.2971 
2025-08-11 18:46:28,740 - INFO - Epoch: 6/500, Iter: 9/119 -- train_loss: 1.3088 
2025-08-11 18:46:29,003 - INFO - Epoch: 6/500, Iter: 10/119 -- train_loss: 1.2743 
2025-08-11 18:46:29,249 - INFO - Epoch: 6/500, Iter: 11/119 -- train_loss: 1.3361 
2025-08-11 18:46:29,928 - INFO - Epoch: 6/500, Iter: 12/119 -- train_loss: 1.3773 
2025-08-11 18:46:30,204 - INFO - Epoch: 6/500, Iter: 13/119 -- train_loss: 1.3768 
2025-08-11 1

[1/20]   5%|5          [00:00<?]

2025-08-11 18:49:11,531 - INFO - Epoch: 7/500, Iter: 1/119 -- train_loss: 1.4343 


[1/119]   1%|           [00:00<?]

2025-08-11 18:49:11,770 - INFO - Epoch: 7/500, Iter: 2/119 -- train_loss: 1.2912 
2025-08-11 18:49:12,003 - INFO - Epoch: 7/500, Iter: 3/119 -- train_loss: 1.4846 
2025-08-11 18:49:12,223 - INFO - Epoch: 7/500, Iter: 4/119 -- train_loss: 1.3076 
2025-08-11 18:49:12,489 - INFO - Epoch: 7/500, Iter: 5/119 -- train_loss: 1.3236 
2025-08-11 18:49:13,805 - INFO - Epoch: 7/500, Iter: 6/119 -- train_loss: 1.4390 
2025-08-11 18:49:14,052 - INFO - Epoch: 7/500, Iter: 7/119 -- train_loss: 1.4302 
2025-08-11 18:49:14,296 - INFO - Epoch: 7/500, Iter: 8/119 -- train_loss: 1.2949 
2025-08-11 18:49:15,711 - INFO - Epoch: 7/500, Iter: 9/119 -- train_loss: 1.3189 
2025-08-11 18:49:15,960 - INFO - Epoch: 7/500, Iter: 10/119 -- train_loss: 1.2268 
2025-08-11 18:49:16,219 - INFO - Epoch: 7/500, Iter: 11/119 -- train_loss: 1.3049 
2025-08-11 18:49:16,482 - INFO - Epoch: 7/500, Iter: 12/119 -- train_loss: 1.3548 
2025-08-11 18:49:16,756 - INFO - Epoch: 7/500, Iter: 13/119 -- train_loss: 1.3687 
2025-08-11 1

[1/20]   5%|5          [00:00<?]

the prediction of class 1 is all 0, this may result in nan/inf distance.


2025-08-11 18:52:01,876 - INFO - Epoch: 8/500, Iter: 1/119 -- train_loss: 1.3084 


[1/119]   1%|           [00:00<?]

2025-08-11 18:52:02,148 - INFO - Epoch: 8/500, Iter: 2/119 -- train_loss: 1.3313 
2025-08-11 18:52:02,409 - INFO - Epoch: 8/500, Iter: 3/119 -- train_loss: 1.3681 
2025-08-11 18:52:03,160 - INFO - Epoch: 8/500, Iter: 4/119 -- train_loss: 1.3956 
2025-08-11 18:52:03,376 - INFO - Epoch: 8/500, Iter: 5/119 -- train_loss: 1.2525 
2025-08-11 18:52:03,612 - INFO - Epoch: 8/500, Iter: 6/119 -- train_loss: 1.2643 
2025-08-11 18:52:03,856 - INFO - Epoch: 8/500, Iter: 7/119 -- train_loss: 1.3413 
2025-08-11 18:52:04,080 - INFO - Epoch: 8/500, Iter: 8/119 -- train_loss: 1.4219 
2025-08-11 18:52:06,338 - INFO - Epoch: 8/500, Iter: 9/119 -- train_loss: 1.4082 
2025-08-11 18:52:07,446 - INFO - Epoch: 8/500, Iter: 10/119 -- train_loss: 1.3712 
2025-08-11 18:52:07,698 - INFO - Epoch: 8/500, Iter: 11/119 -- train_loss: 1.3741 
2025-08-11 18:52:07,942 - INFO - Epoch: 8/500, Iter: 12/119 -- train_loss: 1.3966 
2025-08-11 18:52:08,204 - INFO - Epoch: 8/500, Iter: 13/119 -- train_loss: 1.3614 
2025-08-11 1

[1/20]   5%|5          [00:00<?]

2025-08-11 18:54:54,201 - INFO - Epoch: 9/500, Iter: 1/119 -- train_loss: 1.3435 


[1/119]   1%|           [00:00<?]

2025-08-11 18:54:54,434 - INFO - Epoch: 9/500, Iter: 2/119 -- train_loss: 1.4049 
2025-08-11 18:54:54,683 - INFO - Epoch: 9/500, Iter: 3/119 -- train_loss: 1.3310 
2025-08-11 18:54:54,934 - INFO - Epoch: 9/500, Iter: 4/119 -- train_loss: 1.4524 
2025-08-11 18:54:55,171 - INFO - Epoch: 9/500, Iter: 5/119 -- train_loss: 1.2915 
2025-08-11 18:54:55,401 - INFO - Epoch: 9/500, Iter: 6/119 -- train_loss: 1.2620 
2025-08-11 18:54:55,826 - INFO - Epoch: 9/500, Iter: 7/119 -- train_loss: 1.4084 
2025-08-11 18:54:56,162 - INFO - Epoch: 9/500, Iter: 8/119 -- train_loss: 1.2706 
2025-08-11 18:54:57,601 - INFO - Epoch: 9/500, Iter: 9/119 -- train_loss: 1.2268 
2025-08-11 18:54:58,012 - INFO - Epoch: 9/500, Iter: 10/119 -- train_loss: 1.3581 
2025-08-11 18:54:58,250 - INFO - Epoch: 9/500, Iter: 11/119 -- train_loss: 1.2683 
2025-08-11 18:54:58,517 - INFO - Epoch: 9/500, Iter: 12/119 -- train_loss: 1.4310 
2025-08-11 18:54:58,783 - INFO - Epoch: 9/500, Iter: 13/119 -- train_loss: 1.3676 
2025-08-11 1

[1/20]   5%|5          [00:00<?]

2025-08-11 18:57:37,775 - INFO - Epoch: 10/500, Iter: 1/119 -- train_loss: 1.3259 


[1/119]   1%|           [00:00<?]

2025-08-11 18:57:38,024 - INFO - Epoch: 10/500, Iter: 2/119 -- train_loss: 1.4538 
2025-08-11 18:57:38,292 - INFO - Epoch: 10/500, Iter: 3/119 -- train_loss: 1.4003 
2025-08-11 18:57:38,524 - INFO - Epoch: 10/500, Iter: 4/119 -- train_loss: 1.3629 
2025-08-11 18:57:38,755 - INFO - Epoch: 10/500, Iter: 5/119 -- train_loss: 1.3169 
2025-08-11 18:57:38,975 - INFO - Epoch: 10/500, Iter: 6/119 -- train_loss: 1.3404 
2025-08-11 18:57:39,455 - INFO - Epoch: 10/500, Iter: 7/119 -- train_loss: 1.3163 
2025-08-11 18:57:39,700 - INFO - Epoch: 10/500, Iter: 8/119 -- train_loss: 1.3149 
2025-08-11 18:57:41,099 - INFO - Epoch: 10/500, Iter: 9/119 -- train_loss: 1.2639 
2025-08-11 18:57:41,832 - INFO - Epoch: 10/500, Iter: 10/119 -- train_loss: 1.3061 
2025-08-11 18:57:42,060 - INFO - Epoch: 10/500, Iter: 11/119 -- train_loss: 1.3374 
2025-08-11 18:57:43,634 - INFO - Epoch: 10/500, Iter: 12/119 -- train_loss: 1.3994 
2025-08-11 18:57:43,869 - INFO - Epoch: 10/500, Iter: 13/119 -- train_loss: 1.3818 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:00:54,057 - INFO - Epoch: 11/500, Iter: 1/119 -- train_loss: 1.3042 


[1/119]   1%|           [00:00<?]

2025-08-11 19:00:55,401 - INFO - Epoch: 11/500, Iter: 2/119 -- train_loss: 1.3204 
2025-08-11 19:00:55,776 - INFO - Epoch: 11/500, Iter: 3/119 -- train_loss: 1.3062 
2025-08-11 19:00:56,025 - INFO - Epoch: 11/500, Iter: 4/119 -- train_loss: 1.3276 
2025-08-11 19:00:56,282 - INFO - Epoch: 11/500, Iter: 5/119 -- train_loss: 1.3521 
2025-08-11 19:00:56,518 - INFO - Epoch: 11/500, Iter: 6/119 -- train_loss: 1.4429 
2025-08-11 19:00:56,754 - INFO - Epoch: 11/500, Iter: 7/119 -- train_loss: 1.3109 
2025-08-11 19:00:56,987 - INFO - Epoch: 11/500, Iter: 8/119 -- train_loss: 1.3443 
2025-08-11 19:00:57,452 - INFO - Epoch: 11/500, Iter: 9/119 -- train_loss: 1.3307 
2025-08-11 19:00:59,077 - INFO - Epoch: 11/500, Iter: 10/119 -- train_loss: 1.3715 
2025-08-11 19:00:59,449 - INFO - Epoch: 11/500, Iter: 11/119 -- train_loss: 1.3746 
2025-08-11 19:00:59,670 - INFO - Epoch: 11/500, Iter: 12/119 -- train_loss: 1.2570 
2025-08-11 19:00:59,919 - INFO - Epoch: 11/500, Iter: 13/119 -- train_loss: 1.2187 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:03:37,249 - INFO - Epoch: 12/500, Iter: 1/119 -- train_loss: 1.3073 


[1/119]   1%|           [00:00<?]

2025-08-11 19:03:37,494 - INFO - Epoch: 12/500, Iter: 2/119 -- train_loss: 1.3330 
2025-08-11 19:03:37,742 - INFO - Epoch: 12/500, Iter: 3/119 -- train_loss: 1.2925 
2025-08-11 19:03:38,022 - INFO - Epoch: 12/500, Iter: 4/119 -- train_loss: 1.4789 
2025-08-11 19:03:38,252 - INFO - Epoch: 12/500, Iter: 5/119 -- train_loss: 1.3449 
2025-08-11 19:03:38,468 - INFO - Epoch: 12/500, Iter: 6/119 -- train_loss: 1.3552 
2025-08-11 19:03:38,706 - INFO - Epoch: 12/500, Iter: 7/119 -- train_loss: 1.5286 
2025-08-11 19:03:38,947 - INFO - Epoch: 12/500, Iter: 8/119 -- train_loss: 1.3645 
2025-08-11 19:03:40,960 - INFO - Epoch: 12/500, Iter: 9/119 -- train_loss: 1.3452 
2025-08-11 19:03:41,200 - INFO - Epoch: 12/500, Iter: 10/119 -- train_loss: 1.3201 
2025-08-11 19:03:41,420 - INFO - Epoch: 12/500, Iter: 11/119 -- train_loss: 1.4066 
2025-08-11 19:03:41,653 - INFO - Epoch: 12/500, Iter: 12/119 -- train_loss: 1.4056 
2025-08-11 19:03:41,871 - INFO - Epoch: 12/500, Iter: 13/119 -- train_loss: 1.4470 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:06:27,967 - INFO - Epoch: 13/500, Iter: 1/119 -- train_loss: 1.3571 


[1/119]   1%|           [00:00<?]

2025-08-11 19:06:28,232 - INFO - Epoch: 13/500, Iter: 2/119 -- train_loss: 1.3654 
2025-08-11 19:06:29,441 - INFO - Epoch: 13/500, Iter: 3/119 -- train_loss: 1.2970 
2025-08-11 19:06:29,656 - INFO - Epoch: 13/500, Iter: 4/119 -- train_loss: 1.3658 
2025-08-11 19:06:29,894 - INFO - Epoch: 13/500, Iter: 5/119 -- train_loss: 1.4099 
2025-08-11 19:06:30,117 - INFO - Epoch: 13/500, Iter: 6/119 -- train_loss: 1.4314 
2025-08-11 19:06:30,328 - INFO - Epoch: 13/500, Iter: 7/119 -- train_loss: 1.3503 
2025-08-11 19:06:30,576 - INFO - Epoch: 13/500, Iter: 8/119 -- train_loss: 1.3773 
2025-08-11 19:06:30,823 - INFO - Epoch: 13/500, Iter: 9/119 -- train_loss: 1.4169 
2025-08-11 19:06:31,051 - INFO - Epoch: 13/500, Iter: 10/119 -- train_loss: 1.3555 
2025-08-11 19:06:31,322 - INFO - Epoch: 13/500, Iter: 11/119 -- train_loss: 1.2326 
2025-08-11 19:06:31,668 - INFO - Epoch: 13/500, Iter: 12/119 -- train_loss: 1.3582 
2025-08-11 19:06:31,892 - INFO - Epoch: 13/500, Iter: 13/119 -- train_loss: 1.2924 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:09:18,138 - INFO - Epoch: 14/500, Iter: 1/119 -- train_loss: 1.2972 


[1/119]   1%|           [00:00<?]

2025-08-11 19:09:18,412 - INFO - Epoch: 14/500, Iter: 2/119 -- train_loss: 1.2468 
2025-08-11 19:09:19,007 - INFO - Epoch: 14/500, Iter: 3/119 -- train_loss: 1.3401 
2025-08-11 19:09:19,264 - INFO - Epoch: 14/500, Iter: 4/119 -- train_loss: 1.2702 
2025-08-11 19:09:19,546 - INFO - Epoch: 14/500, Iter: 5/119 -- train_loss: 1.4103 
2025-08-11 19:09:19,769 - INFO - Epoch: 14/500, Iter: 6/119 -- train_loss: 1.3005 
2025-08-11 19:09:20,041 - INFO - Epoch: 14/500, Iter: 7/119 -- train_loss: 1.3827 
2025-08-11 19:09:20,314 - INFO - Epoch: 14/500, Iter: 8/119 -- train_loss: 1.3263 
2025-08-11 19:09:21,004 - INFO - Epoch: 14/500, Iter: 9/119 -- train_loss: 1.3863 
2025-08-11 19:09:23,679 - INFO - Epoch: 14/500, Iter: 10/119 -- train_loss: 1.3930 
2025-08-11 19:09:23,969 - INFO - Epoch: 14/500, Iter: 11/119 -- train_loss: 1.3239 
2025-08-11 19:09:24,199 - INFO - Epoch: 14/500, Iter: 12/119 -- train_loss: 1.3717 
2025-08-11 19:09:25,931 - INFO - Epoch: 14/500, Iter: 13/119 -- train_loss: 1.5035 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:12:10,112 - INFO - Epoch: 15/500, Iter: 1/119 -- train_loss: 1.4595 


[1/119]   1%|           [00:00<?]

2025-08-11 19:12:10,329 - INFO - Epoch: 15/500, Iter: 2/119 -- train_loss: 1.3086 
2025-08-11 19:12:10,582 - INFO - Epoch: 15/500, Iter: 3/119 -- train_loss: 1.3888 
2025-08-11 19:12:10,811 - INFO - Epoch: 15/500, Iter: 4/119 -- train_loss: 1.3913 
2025-08-11 19:12:11,062 - INFO - Epoch: 15/500, Iter: 5/119 -- train_loss: 1.2762 
2025-08-11 19:12:11,296 - INFO - Epoch: 15/500, Iter: 6/119 -- train_loss: 1.3265 
2025-08-11 19:12:11,572 - INFO - Epoch: 15/500, Iter: 7/119 -- train_loss: 1.3900 
2025-08-11 19:12:11,812 - INFO - Epoch: 15/500, Iter: 8/119 -- train_loss: 1.3652 
2025-08-11 19:12:14,796 - INFO - Epoch: 15/500, Iter: 9/119 -- train_loss: 1.4495 
2025-08-11 19:12:15,041 - INFO - Epoch: 15/500, Iter: 10/119 -- train_loss: 1.3287 
2025-08-11 19:12:15,267 - INFO - Epoch: 15/500, Iter: 11/119 -- train_loss: 1.3363 
2025-08-11 19:12:15,495 - INFO - Epoch: 15/500, Iter: 12/119 -- train_loss: 1.2558 
2025-08-11 19:12:15,713 - INFO - Epoch: 15/500, Iter: 13/119 -- train_loss: 1.3172 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:15:01,261 - INFO - Epoch: 16/500, Iter: 1/119 -- train_loss: 1.3007 


[1/119]   1%|           [00:00<?]

2025-08-11 19:15:01,503 - INFO - Epoch: 16/500, Iter: 2/119 -- train_loss: 1.3369 
2025-08-11 19:15:02,723 - INFO - Epoch: 16/500, Iter: 3/119 -- train_loss: 1.4570 
2025-08-11 19:15:02,928 - INFO - Epoch: 16/500, Iter: 4/119 -- train_loss: 1.3166 
2025-08-11 19:15:03,187 - INFO - Epoch: 16/500, Iter: 5/119 -- train_loss: 1.3487 
2025-08-11 19:15:03,441 - INFO - Epoch: 16/500, Iter: 6/119 -- train_loss: 1.4085 
2025-08-11 19:15:03,665 - INFO - Epoch: 16/500, Iter: 7/119 -- train_loss: 1.3361 
2025-08-11 19:15:03,934 - INFO - Epoch: 16/500, Iter: 8/119 -- train_loss: 1.4426 
2025-08-11 19:15:06,340 - INFO - Epoch: 16/500, Iter: 9/119 -- train_loss: 1.3029 
2025-08-11 19:15:06,572 - INFO - Epoch: 16/500, Iter: 10/119 -- train_loss: 1.4458 
2025-08-11 19:15:06,805 - INFO - Epoch: 16/500, Iter: 11/119 -- train_loss: 1.2450 
2025-08-11 19:15:07,021 - INFO - Epoch: 16/500, Iter: 12/119 -- train_loss: 1.3863 
2025-08-11 19:15:07,232 - INFO - Epoch: 16/500, Iter: 13/119 -- train_loss: 1.3342 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:17:51,529 - INFO - Epoch: 17/500, Iter: 1/119 -- train_loss: 1.3583 


[1/119]   1%|           [00:00<?]

2025-08-11 19:17:51,773 - INFO - Epoch: 17/500, Iter: 2/119 -- train_loss: 1.2884 
2025-08-11 19:17:51,995 - INFO - Epoch: 17/500, Iter: 3/119 -- train_loss: 1.2720 
2025-08-11 19:17:52,362 - INFO - Epoch: 17/500, Iter: 4/119 -- train_loss: 1.3804 
2025-08-11 19:17:52,612 - INFO - Epoch: 17/500, Iter: 5/119 -- train_loss: 1.2498 
2025-08-11 19:17:52,849 - INFO - Epoch: 17/500, Iter: 6/119 -- train_loss: 1.3753 
2025-08-11 19:17:53,092 - INFO - Epoch: 17/500, Iter: 7/119 -- train_loss: 1.3113 
2025-08-11 19:17:53,341 - INFO - Epoch: 17/500, Iter: 8/119 -- train_loss: 1.3997 
2025-08-11 19:17:54,675 - INFO - Epoch: 17/500, Iter: 9/119 -- train_loss: 1.4503 
2025-08-11 19:17:54,912 - INFO - Epoch: 17/500, Iter: 10/119 -- train_loss: 1.2553 
2025-08-11 19:17:55,172 - INFO - Epoch: 17/500, Iter: 11/119 -- train_loss: 1.3283 
2025-08-11 19:17:55,439 - INFO - Epoch: 17/500, Iter: 12/119 -- train_loss: 1.3817 
2025-08-11 19:17:55,711 - INFO - Epoch: 17/500, Iter: 13/119 -- train_loss: 1.3279 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:20:43,287 - INFO - Epoch: 18/500, Iter: 1/119 -- train_loss: 1.3380 


[1/119]   1%|           [00:00<?]

2025-08-11 19:20:43,520 - INFO - Epoch: 18/500, Iter: 2/119 -- train_loss: 1.2949 
2025-08-11 19:20:43,786 - INFO - Epoch: 18/500, Iter: 3/119 -- train_loss: 1.4455 
2025-08-11 19:20:44,020 - INFO - Epoch: 18/500, Iter: 4/119 -- train_loss: 1.3240 
2025-08-11 19:20:44,253 - INFO - Epoch: 18/500, Iter: 5/119 -- train_loss: 1.2949 
2025-08-11 19:20:44,520 - INFO - Epoch: 18/500, Iter: 6/119 -- train_loss: 1.2769 
2025-08-11 19:20:44,770 - INFO - Epoch: 18/500, Iter: 7/119 -- train_loss: 1.3750 
2025-08-11 19:20:45,036 - INFO - Epoch: 18/500, Iter: 8/119 -- train_loss: 1.3116 
2025-08-11 19:20:45,625 - INFO - Epoch: 18/500, Iter: 9/119 -- train_loss: 1.3817 
2025-08-11 19:20:47,362 - INFO - Epoch: 18/500, Iter: 10/119 -- train_loss: 1.3771 
2025-08-11 19:20:47,586 - INFO - Epoch: 18/500, Iter: 11/119 -- train_loss: 1.3729 
2025-08-11 19:20:47,870 - INFO - Epoch: 18/500, Iter: 12/119 -- train_loss: 1.3086 
2025-08-11 19:20:48,115 - INFO - Epoch: 18/500, Iter: 13/119 -- train_loss: 1.3925 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:23:36,665 - INFO - Epoch: 19/500, Iter: 1/119 -- train_loss: 1.4014 


[1/119]   1%|           [00:00<?]

2025-08-11 19:23:38,035 - INFO - Epoch: 19/500, Iter: 2/119 -- train_loss: 1.3096 
2025-08-11 19:23:38,301 - INFO - Epoch: 19/500, Iter: 3/119 -- train_loss: 1.3769 
2025-08-11 19:23:38,559 - INFO - Epoch: 19/500, Iter: 4/119 -- train_loss: 1.3363 
2025-08-11 19:23:38,833 - INFO - Epoch: 19/500, Iter: 5/119 -- train_loss: 1.3068 
2025-08-11 19:23:40,493 - INFO - Epoch: 19/500, Iter: 6/119 -- train_loss: 1.4422 
2025-08-11 19:23:40,741 - INFO - Epoch: 19/500, Iter: 7/119 -- train_loss: 1.2615 
2025-08-11 19:23:40,989 - INFO - Epoch: 19/500, Iter: 8/119 -- train_loss: 1.2918 
2025-08-11 19:23:41,247 - INFO - Epoch: 19/500, Iter: 9/119 -- train_loss: 1.3021 
2025-08-11 19:23:44,722 - INFO - Epoch: 19/500, Iter: 10/119 -- train_loss: 1.5253 
2025-08-11 19:23:44,963 - INFO - Epoch: 19/500, Iter: 11/119 -- train_loss: 1.3461 
2025-08-11 19:23:45,166 - INFO - Epoch: 19/500, Iter: 12/119 -- train_loss: 1.4838 
2025-08-11 19:23:45,431 - INFO - Epoch: 19/500, Iter: 13/119 -- train_loss: 1.4838 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:26:23,822 - INFO - Epoch: 20/500, Iter: 1/119 -- train_loss: 1.3332 


[1/119]   1%|           [00:00<?]

2025-08-11 19:26:25,550 - INFO - Epoch: 20/500, Iter: 2/119 -- train_loss: 1.3186 
2025-08-11 19:26:25,763 - INFO - Epoch: 20/500, Iter: 3/119 -- train_loss: 1.3116 
2025-08-11 19:26:26,009 - INFO - Epoch: 20/500, Iter: 4/119 -- train_loss: 1.2673 
2025-08-11 19:26:26,987 - INFO - Epoch: 20/500, Iter: 5/119 -- train_loss: 1.3342 
2025-08-11 19:26:27,249 - INFO - Epoch: 20/500, Iter: 6/119 -- train_loss: 1.3707 
2025-08-11 19:26:27,506 - INFO - Epoch: 20/500, Iter: 7/119 -- train_loss: 1.3023 
2025-08-11 19:26:27,755 - INFO - Epoch: 20/500, Iter: 8/119 -- train_loss: 1.3154 
2025-08-11 19:26:27,996 - INFO - Epoch: 20/500, Iter: 9/119 -- train_loss: 1.3358 
2025-08-11 19:26:29,671 - INFO - Epoch: 20/500, Iter: 10/119 -- train_loss: 1.4148 
2025-08-11 19:26:29,904 - INFO - Epoch: 20/500, Iter: 11/119 -- train_loss: 1.3285 
2025-08-11 19:26:30,171 - INFO - Epoch: 20/500, Iter: 12/119 -- train_loss: 1.2583 
2025-08-11 19:26:30,670 - INFO - Epoch: 20/500, Iter: 13/119 -- train_loss: 1.3171 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:29:14,948 - INFO - Epoch: 21/500, Iter: 1/119 -- train_loss: 1.3603 


[1/119]   1%|           [00:00<?]

2025-08-11 19:29:15,410 - INFO - Epoch: 21/500, Iter: 2/119 -- train_loss: 1.4226 
2025-08-11 19:29:15,651 - INFO - Epoch: 21/500, Iter: 3/119 -- train_loss: 1.3980 
2025-08-11 19:29:15,918 - INFO - Epoch: 21/500, Iter: 4/119 -- train_loss: 1.3523 
2025-08-11 19:29:17,533 - INFO - Epoch: 21/500, Iter: 5/119 -- train_loss: 1.3221 
2025-08-11 19:29:17,750 - INFO - Epoch: 21/500, Iter: 6/119 -- train_loss: 1.3946 
2025-08-11 19:29:18,028 - INFO - Epoch: 21/500, Iter: 7/119 -- train_loss: 1.3733 
2025-08-11 19:29:18,259 - INFO - Epoch: 21/500, Iter: 8/119 -- train_loss: 1.3427 
2025-08-11 19:29:19,754 - INFO - Epoch: 21/500, Iter: 9/119 -- train_loss: 1.3046 
2025-08-11 19:29:19,997 - INFO - Epoch: 21/500, Iter: 10/119 -- train_loss: 1.3389 
2025-08-11 19:29:20,231 - INFO - Epoch: 21/500, Iter: 11/119 -- train_loss: 1.3447 
2025-08-11 19:29:20,514 - INFO - Epoch: 21/500, Iter: 12/119 -- train_loss: 1.4571 
2025-08-11 19:29:20,761 - INFO - Epoch: 21/500, Iter: 13/119 -- train_loss: 1.3229 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:32:09,506 - INFO - Epoch: 22/500, Iter: 1/119 -- train_loss: 1.3765 


[1/119]   1%|           [00:00<?]

2025-08-11 19:32:09,769 - INFO - Epoch: 22/500, Iter: 2/119 -- train_loss: 1.2361 
2025-08-11 19:32:10,008 - INFO - Epoch: 22/500, Iter: 3/119 -- train_loss: 1.3163 
2025-08-11 19:32:10,264 - INFO - Epoch: 22/500, Iter: 4/119 -- train_loss: 1.4110 
2025-08-11 19:32:10,511 - INFO - Epoch: 22/500, Iter: 5/119 -- train_loss: 1.3082 
2025-08-11 19:32:10,740 - INFO - Epoch: 22/500, Iter: 6/119 -- train_loss: 1.3276 
2025-08-11 19:32:11,001 - INFO - Epoch: 22/500, Iter: 7/119 -- train_loss: 1.2722 
2025-08-11 19:32:11,237 - INFO - Epoch: 22/500, Iter: 8/119 -- train_loss: 1.4144 
2025-08-11 19:32:13,790 - INFO - Epoch: 22/500, Iter: 9/119 -- train_loss: 1.3176 
2025-08-11 19:32:14,020 - INFO - Epoch: 22/500, Iter: 10/119 -- train_loss: 1.3880 
2025-08-11 19:32:14,246 - INFO - Epoch: 22/500, Iter: 11/119 -- train_loss: 1.3197 
2025-08-11 19:32:15,790 - INFO - Epoch: 22/500, Iter: 12/119 -- train_loss: 1.3835 
2025-08-11 19:32:16,022 - INFO - Epoch: 22/500, Iter: 13/119 -- train_loss: 1.4118 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:35:01,677 - INFO - Epoch: 23/500, Iter: 1/119 -- train_loss: 1.4041 


[1/119]   1%|           [00:00<?]

2025-08-11 19:35:01,911 - INFO - Epoch: 23/500, Iter: 2/119 -- train_loss: 1.4098 
2025-08-11 19:35:02,144 - INFO - Epoch: 23/500, Iter: 3/119 -- train_loss: 1.3955 
2025-08-11 19:35:02,429 - INFO - Epoch: 23/500, Iter: 4/119 -- train_loss: 1.3292 
2025-08-11 19:35:02,648 - INFO - Epoch: 23/500, Iter: 5/119 -- train_loss: 1.3471 
2025-08-11 19:35:02,881 - INFO - Epoch: 23/500, Iter: 6/119 -- train_loss: 1.3862 
2025-08-11 19:35:03,223 - INFO - Epoch: 23/500, Iter: 7/119 -- train_loss: 1.3334 
2025-08-11 19:35:03,459 - INFO - Epoch: 23/500, Iter: 8/119 -- train_loss: 1.3435 
2025-08-11 19:35:05,721 - INFO - Epoch: 23/500, Iter: 9/119 -- train_loss: 1.3716 
2025-08-11 19:35:05,975 - INFO - Epoch: 23/500, Iter: 10/119 -- train_loss: 1.2439 
2025-08-11 19:35:06,665 - INFO - Epoch: 23/500, Iter: 11/119 -- train_loss: 1.3758 
2025-08-11 19:35:06,914 - INFO - Epoch: 23/500, Iter: 12/119 -- train_loss: 1.3009 
2025-08-11 19:35:07,158 - INFO - Epoch: 23/500, Iter: 13/119 -- train_loss: 1.4531 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:37:53,811 - INFO - Epoch: 24/500, Iter: 1/119 -- train_loss: 1.3837 


[1/119]   1%|           [00:00<?]

2025-08-11 19:37:54,065 - INFO - Epoch: 24/500, Iter: 2/119 -- train_loss: 1.3262 
2025-08-11 19:37:54,343 - INFO - Epoch: 24/500, Iter: 3/119 -- train_loss: 1.4893 
2025-08-11 19:37:54,603 - INFO - Epoch: 24/500, Iter: 4/119 -- train_loss: 1.2832 
2025-08-11 19:37:54,889 - INFO - Epoch: 24/500, Iter: 5/119 -- train_loss: 1.3766 
2025-08-11 19:37:55,123 - INFO - Epoch: 24/500, Iter: 6/119 -- train_loss: 1.3779 
2025-08-11 19:37:55,350 - INFO - Epoch: 24/500, Iter: 7/119 -- train_loss: 1.3623 
2025-08-11 19:37:55,594 - INFO - Epoch: 24/500, Iter: 8/119 -- train_loss: 1.3139 
2025-08-11 19:37:56,223 - INFO - Epoch: 24/500, Iter: 9/119 -- train_loss: 1.3396 
2025-08-11 19:37:56,484 - INFO - Epoch: 24/500, Iter: 10/119 -- train_loss: 1.4118 
2025-08-11 19:37:57,142 - INFO - Epoch: 24/500, Iter: 11/119 -- train_loss: 1.2911 
2025-08-11 19:37:57,429 - INFO - Epoch: 24/500, Iter: 12/119 -- train_loss: 1.3544 
2025-08-11 19:37:59,463 - INFO - Epoch: 24/500, Iter: 13/119 -- train_loss: 1.3744 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:40:43,318 - INFO - Epoch: 25/500, Iter: 1/119 -- train_loss: 1.3311 


[1/119]   1%|           [00:00<?]

2025-08-11 19:40:44,075 - INFO - Epoch: 25/500, Iter: 2/119 -- train_loss: 1.4543 
2025-08-11 19:40:44,305 - INFO - Epoch: 25/500, Iter: 3/119 -- train_loss: 1.3405 
2025-08-11 19:40:44,557 - INFO - Epoch: 25/500, Iter: 4/119 -- train_loss: 1.3945 
2025-08-11 19:40:44,798 - INFO - Epoch: 25/500, Iter: 5/119 -- train_loss: 1.3637 
2025-08-11 19:40:45,049 - INFO - Epoch: 25/500, Iter: 6/119 -- train_loss: 1.4154 
2025-08-11 19:40:45,302 - INFO - Epoch: 25/500, Iter: 7/119 -- train_loss: 1.3676 
2025-08-11 19:40:45,524 - INFO - Epoch: 25/500, Iter: 8/119 -- train_loss: 1.3564 
2025-08-11 19:40:45,980 - INFO - Epoch: 25/500, Iter: 9/119 -- train_loss: 1.3041 
2025-08-11 19:40:47,306 - INFO - Epoch: 25/500, Iter: 10/119 -- train_loss: 1.3693 
2025-08-11 19:40:47,564 - INFO - Epoch: 25/500, Iter: 11/119 -- train_loss: 1.4033 
2025-08-11 19:40:47,832 - INFO - Epoch: 25/500, Iter: 12/119 -- train_loss: 1.4008 
2025-08-11 19:40:49,647 - INFO - Epoch: 25/500, Iter: 13/119 -- train_loss: 1.5205 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:43:34,709 - INFO - Epoch: 26/500, Iter: 1/119 -- train_loss: 1.4298 


[1/119]   1%|           [00:00<?]

2025-08-11 19:43:34,963 - INFO - Epoch: 26/500, Iter: 2/119 -- train_loss: 1.3730 
2025-08-11 19:43:35,208 - INFO - Epoch: 26/500, Iter: 3/119 -- train_loss: 1.4243 
2025-08-11 19:43:36,877 - INFO - Epoch: 26/500, Iter: 4/119 -- train_loss: 1.3091 
2025-08-11 19:43:37,110 - INFO - Epoch: 26/500, Iter: 5/119 -- train_loss: 1.4371 
2025-08-11 19:43:37,330 - INFO - Epoch: 26/500, Iter: 6/119 -- train_loss: 1.2791 
2025-08-11 19:43:37,561 - INFO - Epoch: 26/500, Iter: 7/119 -- train_loss: 1.2644 
2025-08-11 19:43:37,837 - INFO - Epoch: 26/500, Iter: 8/119 -- train_loss: 1.3985 
2025-08-11 19:43:38,098 - INFO - Epoch: 26/500, Iter: 9/119 -- train_loss: 1.3328 
2025-08-11 19:43:38,328 - INFO - Epoch: 26/500, Iter: 10/119 -- train_loss: 1.2515 
2025-08-11 19:43:38,555 - INFO - Epoch: 26/500, Iter: 11/119 -- train_loss: 1.3596 
2025-08-11 19:43:39,216 - INFO - Epoch: 26/500, Iter: 12/119 -- train_loss: 1.3950 
2025-08-11 19:43:39,462 - INFO - Epoch: 26/500, Iter: 13/119 -- train_loss: 1.3246 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:46:21,273 - INFO - Epoch: 27/500, Iter: 1/119 -- train_loss: 1.2455 


[1/119]   1%|           [00:00<?]

2025-08-11 19:46:21,551 - INFO - Epoch: 27/500, Iter: 2/119 -- train_loss: 1.3587 
2025-08-11 19:46:21,934 - INFO - Epoch: 27/500, Iter: 3/119 -- train_loss: 1.3463 
2025-08-11 19:46:22,178 - INFO - Epoch: 27/500, Iter: 4/119 -- train_loss: 1.4051 
2025-08-11 19:46:22,407 - INFO - Epoch: 27/500, Iter: 5/119 -- train_loss: 1.2985 
2025-08-11 19:46:22,649 - INFO - Epoch: 27/500, Iter: 6/119 -- train_loss: 1.2949 
2025-08-11 19:46:22,894 - INFO - Epoch: 27/500, Iter: 7/119 -- train_loss: 1.4282 
2025-08-11 19:46:23,145 - INFO - Epoch: 27/500, Iter: 8/119 -- train_loss: 1.2633 
2025-08-11 19:46:25,753 - INFO - Epoch: 27/500, Iter: 9/119 -- train_loss: 1.3153 
2025-08-11 19:46:26,027 - INFO - Epoch: 27/500, Iter: 10/119 -- train_loss: 1.2989 
2025-08-11 19:46:26,311 - INFO - Epoch: 27/500, Iter: 11/119 -- train_loss: 1.3126 
2025-08-11 19:46:26,554 - INFO - Epoch: 27/500, Iter: 12/119 -- train_loss: 1.3446 
2025-08-11 19:46:26,785 - INFO - Epoch: 27/500, Iter: 13/119 -- train_loss: 1.3545 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:49:09,020 - INFO - Epoch: 28/500, Iter: 1/119 -- train_loss: 1.3796 


[1/119]   1%|           [00:00<?]

2025-08-11 19:49:10,186 - INFO - Epoch: 28/500, Iter: 2/119 -- train_loss: 1.3479 
2025-08-11 19:49:11,313 - INFO - Epoch: 28/500, Iter: 3/119 -- train_loss: 1.2721 
2025-08-11 19:49:12,002 - INFO - Epoch: 28/500, Iter: 4/119 -- train_loss: 1.2975 
2025-08-11 19:49:12,219 - INFO - Epoch: 28/500, Iter: 5/119 -- train_loss: 1.2178 
2025-08-11 19:49:12,436 - INFO - Epoch: 28/500, Iter: 6/119 -- train_loss: 1.3648 
2025-08-11 19:49:12,690 - INFO - Epoch: 28/500, Iter: 7/119 -- train_loss: 1.3073 
2025-08-11 19:49:12,936 - INFO - Epoch: 28/500, Iter: 8/119 -- train_loss: 1.3311 
2025-08-11 19:49:13,563 - INFO - Epoch: 28/500, Iter: 9/119 -- train_loss: 1.3058 
2025-08-11 19:49:13,867 - INFO - Epoch: 28/500, Iter: 10/119 -- train_loss: 1.4356 
2025-08-11 19:49:14,128 - INFO - Epoch: 28/500, Iter: 11/119 -- train_loss: 1.3507 
2025-08-11 19:49:18,969 - INFO - Epoch: 28/500, Iter: 12/119 -- train_loss: 1.4616 
2025-08-11 19:49:19,202 - INFO - Epoch: 28/500, Iter: 13/119 -- train_loss: 1.3273 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:51:57,407 - INFO - Epoch: 29/500, Iter: 1/119 -- train_loss: 1.3797 


[1/119]   1%|           [00:00<?]

2025-08-11 19:51:57,753 - INFO - Epoch: 29/500, Iter: 2/119 -- train_loss: 1.2978 
2025-08-11 19:51:57,985 - INFO - Epoch: 29/500, Iter: 3/119 -- train_loss: 1.3405 
2025-08-11 19:52:00,771 - INFO - Epoch: 29/500, Iter: 4/119 -- train_loss: 1.3707 
2025-08-11 19:52:00,995 - INFO - Epoch: 29/500, Iter: 5/119 -- train_loss: 1.3527 
2025-08-11 19:52:01,227 - INFO - Epoch: 29/500, Iter: 6/119 -- train_loss: 1.2904 
2025-08-11 19:52:01,465 - INFO - Epoch: 29/500, Iter: 7/119 -- train_loss: 1.3382 
2025-08-11 19:52:01,679 - INFO - Epoch: 29/500, Iter: 8/119 -- train_loss: 1.3752 
2025-08-11 19:52:01,962 - INFO - Epoch: 29/500, Iter: 9/119 -- train_loss: 1.3596 
2025-08-11 19:52:03,504 - INFO - Epoch: 29/500, Iter: 10/119 -- train_loss: 1.3904 
2025-08-11 19:52:03,737 - INFO - Epoch: 29/500, Iter: 11/119 -- train_loss: 1.3563 
2025-08-11 19:52:03,983 - INFO - Epoch: 29/500, Iter: 12/119 -- train_loss: 1.3116 
2025-08-11 19:52:04,217 - INFO - Epoch: 29/500, Iter: 13/119 -- train_loss: 1.2935 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:54:51,533 - INFO - Epoch: 30/500, Iter: 1/119 -- train_loss: 1.3823 


[1/119]   1%|           [00:00<?]

2025-08-11 19:54:51,779 - INFO - Epoch: 30/500, Iter: 2/119 -- train_loss: 1.4887 
2025-08-11 19:54:51,987 - INFO - Epoch: 30/500, Iter: 3/119 -- train_loss: 1.3788 
2025-08-11 19:54:52,238 - INFO - Epoch: 30/500, Iter: 4/119 -- train_loss: 1.3232 
2025-08-11 19:54:55,619 - INFO - Epoch: 30/500, Iter: 5/119 -- train_loss: 1.3183 
2025-08-11 19:54:55,820 - INFO - Epoch: 30/500, Iter: 6/119 -- train_loss: 1.3075 
2025-08-11 19:54:56,058 - INFO - Epoch: 30/500, Iter: 7/119 -- train_loss: 1.4392 
2025-08-11 19:54:56,272 - INFO - Epoch: 30/500, Iter: 8/119 -- train_loss: 1.3755 
2025-08-11 19:54:56,485 - INFO - Epoch: 30/500, Iter: 9/119 -- train_loss: 1.2173 
2025-08-11 19:54:56,707 - INFO - Epoch: 30/500, Iter: 10/119 -- train_loss: 1.3761 
2025-08-11 19:54:56,976 - INFO - Epoch: 30/500, Iter: 11/119 -- train_loss: 1.3351 
2025-08-11 19:54:57,213 - INFO - Epoch: 30/500, Iter: 12/119 -- train_loss: 1.3787 
2025-08-11 19:54:57,661 - INFO - Epoch: 30/500, Iter: 13/119 -- train_loss: 1.3037 


[1/20]   5%|5          [00:00<?]

2025-08-11 19:57:37,280 - INFO - Epoch: 31/500, Iter: 1/119 -- train_loss: 1.3998 


[1/119]   1%|           [00:00<?]

2025-08-11 19:57:38,588 - INFO - Epoch: 31/500, Iter: 2/119 -- train_loss: 1.3122 
2025-08-11 19:57:38,851 - INFO - Epoch: 31/500, Iter: 3/119 -- train_loss: 1.4238 
2025-08-11 19:57:39,107 - INFO - Epoch: 31/500, Iter: 4/119 -- train_loss: 1.3225 
2025-08-11 19:57:39,348 - INFO - Epoch: 31/500, Iter: 5/119 -- train_loss: 1.3282 
2025-08-11 19:57:39,596 - INFO - Epoch: 31/500, Iter: 6/119 -- train_loss: 1.3700 
2025-08-11 19:57:39,857 - INFO - Epoch: 31/500, Iter: 7/119 -- train_loss: 1.3694 
2025-08-11 19:57:40,088 - INFO - Epoch: 31/500, Iter: 8/119 -- train_loss: 1.3439 
2025-08-11 19:57:42,296 - INFO - Epoch: 31/500, Iter: 9/119 -- train_loss: 1.3071 
2025-08-11 19:57:42,582 - INFO - Epoch: 31/500, Iter: 10/119 -- train_loss: 1.2130 
2025-08-11 19:57:42,806 - INFO - Epoch: 31/500, Iter: 11/119 -- train_loss: 1.2809 
2025-08-11 19:57:43,264 - INFO - Epoch: 31/500, Iter: 12/119 -- train_loss: 1.3713 
2025-08-11 19:57:43,501 - INFO - Epoch: 31/500, Iter: 13/119 -- train_loss: 1.4166 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:00:32,375 - INFO - Epoch: 32/500, Iter: 1/119 -- train_loss: 1.3699 


[1/119]   1%|           [00:00<?]

2025-08-11 20:00:34,341 - INFO - Epoch: 32/500, Iter: 2/119 -- train_loss: 1.3710 
2025-08-11 20:00:34,570 - INFO - Epoch: 32/500, Iter: 3/119 -- train_loss: 1.2717 
2025-08-11 20:00:34,809 - INFO - Epoch: 32/500, Iter: 4/119 -- train_loss: 1.3433 
2025-08-11 20:00:35,024 - INFO - Epoch: 32/500, Iter: 5/119 -- train_loss: 1.4338 
2025-08-11 20:00:35,264 - INFO - Epoch: 32/500, Iter: 6/119 -- train_loss: 1.2641 
2025-08-11 20:00:35,519 - INFO - Epoch: 32/500, Iter: 7/119 -- train_loss: 1.4262 
2025-08-11 20:00:35,742 - INFO - Epoch: 32/500, Iter: 8/119 -- train_loss: 1.3154 
2025-08-11 20:00:35,980 - INFO - Epoch: 32/500, Iter: 9/119 -- train_loss: 1.3149 
2025-08-11 20:00:36,467 - INFO - Epoch: 32/500, Iter: 10/119 -- train_loss: 1.2890 
2025-08-11 20:00:36,733 - INFO - Epoch: 32/500, Iter: 11/119 -- train_loss: 1.3699 
2025-08-11 20:00:37,020 - INFO - Epoch: 32/500, Iter: 12/119 -- train_loss: 1.3610 
2025-08-11 20:00:37,260 - INFO - Epoch: 32/500, Iter: 13/119 -- train_loss: 1.3734 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:03:23,481 - INFO - Epoch: 33/500, Iter: 1/119 -- train_loss: 1.3079 


[1/119]   1%|           [00:00<?]

2025-08-11 20:03:23,748 - INFO - Epoch: 33/500, Iter: 2/119 -- train_loss: 1.2190 
2025-08-11 20:03:23,974 - INFO - Epoch: 33/500, Iter: 3/119 -- train_loss: 1.3991 
2025-08-11 20:03:24,219 - INFO - Epoch: 33/500, Iter: 4/119 -- train_loss: 1.3292 
2025-08-11 20:03:24,926 - INFO - Epoch: 33/500, Iter: 5/119 -- train_loss: 1.3191 
2025-08-11 20:03:25,164 - INFO - Epoch: 33/500, Iter: 6/119 -- train_loss: 1.2575 
2025-08-11 20:03:25,416 - INFO - Epoch: 33/500, Iter: 7/119 -- train_loss: 1.3626 
2025-08-11 20:03:25,696 - INFO - Epoch: 33/500, Iter: 8/119 -- train_loss: 1.3560 
2025-08-11 20:03:30,287 - INFO - Epoch: 33/500, Iter: 9/119 -- train_loss: 1.3271 
2025-08-11 20:03:30,521 - INFO - Epoch: 33/500, Iter: 10/119 -- train_loss: 1.2964 
2025-08-11 20:03:30,733 - INFO - Epoch: 33/500, Iter: 11/119 -- train_loss: 1.2978 
2025-08-11 20:03:30,967 - INFO - Epoch: 33/500, Iter: 12/119 -- train_loss: 1.5739 
2025-08-11 20:03:31,213 - INFO - Epoch: 33/500, Iter: 13/119 -- train_loss: 1.4167 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:06:13,671 - INFO - Epoch: 34/500, Iter: 1/119 -- train_loss: 1.3868 


[1/119]   1%|           [00:00<?]

2025-08-11 20:06:13,922 - INFO - Epoch: 34/500, Iter: 2/119 -- train_loss: 1.3919 
2025-08-11 20:06:14,422 - INFO - Epoch: 34/500, Iter: 3/119 -- train_loss: 1.2582 
2025-08-11 20:06:14,688 - INFO - Epoch: 34/500, Iter: 4/119 -- train_loss: 1.2435 
2025-08-11 20:06:14,942 - INFO - Epoch: 34/500, Iter: 5/119 -- train_loss: 1.3980 
2025-08-11 20:06:15,194 - INFO - Epoch: 34/500, Iter: 6/119 -- train_loss: 1.4174 
2025-08-11 20:06:15,427 - INFO - Epoch: 34/500, Iter: 7/119 -- train_loss: 1.4419 
2025-08-11 20:06:15,688 - INFO - Epoch: 34/500, Iter: 8/119 -- train_loss: 1.2853 
2025-08-11 20:06:15,956 - INFO - Epoch: 34/500, Iter: 9/119 -- train_loss: 1.3036 
2025-08-11 20:06:18,409 - INFO - Epoch: 34/500, Iter: 10/119 -- train_loss: 1.3451 
2025-08-11 20:06:18,668 - INFO - Epoch: 34/500, Iter: 11/119 -- train_loss: 1.2784 
2025-08-11 20:06:18,921 - INFO - Epoch: 34/500, Iter: 12/119 -- train_loss: 1.4203 
2025-08-11 20:06:20,716 - INFO - Epoch: 34/500, Iter: 13/119 -- train_loss: 1.4016 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:09:06,713 - INFO - Epoch: 35/500, Iter: 1/119 -- train_loss: 1.4062 


[1/119]   1%|           [00:00<?]

2025-08-11 20:09:06,949 - INFO - Epoch: 35/500, Iter: 2/119 -- train_loss: 1.3713 
2025-08-11 20:09:07,214 - INFO - Epoch: 35/500, Iter: 3/119 -- train_loss: 1.3037 
2025-08-11 20:09:07,432 - INFO - Epoch: 35/500, Iter: 4/119 -- train_loss: 1.4201 
2025-08-11 20:09:07,698 - INFO - Epoch: 35/500, Iter: 5/119 -- train_loss: 1.3693 
2025-08-11 20:09:07,937 - INFO - Epoch: 35/500, Iter: 6/119 -- train_loss: 1.2521 
2025-08-11 20:09:08,164 - INFO - Epoch: 35/500, Iter: 7/119 -- train_loss: 1.4767 
2025-08-11 20:09:08,431 - INFO - Epoch: 35/500, Iter: 8/119 -- train_loss: 1.4098 
2025-08-11 20:09:11,113 - INFO - Epoch: 35/500, Iter: 9/119 -- train_loss: 1.3485 
2025-08-11 20:09:11,388 - INFO - Epoch: 35/500, Iter: 10/119 -- train_loss: 1.4139 
2025-08-11 20:09:11,629 - INFO - Epoch: 35/500, Iter: 11/119 -- train_loss: 1.4218 
2025-08-11 20:09:11,881 - INFO - Epoch: 35/500, Iter: 12/119 -- train_loss: 1.4024 
2025-08-11 20:09:12,117 - INFO - Epoch: 35/500, Iter: 13/119 -- train_loss: 1.3659 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:11:55,466 - INFO - Epoch: 36/500, Iter: 1/119 -- train_loss: 1.4484 


[1/119]   1%|           [00:00<?]

2025-08-11 20:11:56,222 - INFO - Epoch: 36/500, Iter: 2/119 -- train_loss: 1.4087 
2025-08-11 20:11:56,954 - INFO - Epoch: 36/500, Iter: 3/119 -- train_loss: 1.7278 
2025-08-11 20:11:57,206 - INFO - Epoch: 36/500, Iter: 4/119 -- train_loss: 1.3464 
2025-08-11 20:11:59,479 - INFO - Epoch: 36/500, Iter: 5/119 -- train_loss: 1.4268 
2025-08-11 20:11:59,699 - INFO - Epoch: 36/500, Iter: 6/119 -- train_loss: 1.3742 
2025-08-11 20:11:59,924 - INFO - Epoch: 36/500, Iter: 7/119 -- train_loss: 1.3814 
2025-08-11 20:12:00,139 - INFO - Epoch: 36/500, Iter: 8/119 -- train_loss: 1.2848 
2025-08-11 20:12:00,375 - INFO - Epoch: 36/500, Iter: 9/119 -- train_loss: 1.2398 
2025-08-11 20:12:00,621 - INFO - Epoch: 36/500, Iter: 10/119 -- train_loss: 1.2970 
2025-08-11 20:12:00,904 - INFO - Epoch: 36/500, Iter: 11/119 -- train_loss: 1.3106 
2025-08-11 20:12:01,136 - INFO - Epoch: 36/500, Iter: 12/119 -- train_loss: 1.3341 
2025-08-11 20:12:01,740 - INFO - Epoch: 36/500, Iter: 13/119 -- train_loss: 1.3564 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:14:57,696 - INFO - Epoch: 37/500, Iter: 1/119 -- train_loss: 1.4626 


[1/119]   1%|           [00:00<?]

2025-08-11 20:14:57,930 - INFO - Epoch: 37/500, Iter: 2/119 -- train_loss: 1.3096 
2025-08-11 20:14:58,147 - INFO - Epoch: 37/500, Iter: 3/119 -- train_loss: 1.5482 
2025-08-11 20:14:58,385 - INFO - Epoch: 37/500, Iter: 4/119 -- train_loss: 1.3972 
2025-08-11 20:14:58,608 - INFO - Epoch: 37/500, Iter: 5/119 -- train_loss: 1.3740 
2025-08-11 20:14:58,857 - INFO - Epoch: 37/500, Iter: 6/119 -- train_loss: 1.3450 
2025-08-11 20:14:59,083 - INFO - Epoch: 37/500, Iter: 7/119 -- train_loss: 1.3332 
2025-08-11 20:14:59,307 - INFO - Epoch: 37/500, Iter: 8/119 -- train_loss: 1.3233 
2025-08-11 20:15:00,330 - INFO - Epoch: 37/500, Iter: 9/119 -- train_loss: 1.2280 
2025-08-11 20:15:00,564 - INFO - Epoch: 37/500, Iter: 10/119 -- train_loss: 1.2474 
2025-08-11 20:15:00,840 - INFO - Epoch: 37/500, Iter: 11/119 -- train_loss: 1.3962 
2025-08-11 20:15:01,111 - INFO - Epoch: 37/500, Iter: 12/119 -- train_loss: 1.3789 
2025-08-11 20:15:01,364 - INFO - Epoch: 37/500, Iter: 13/119 -- train_loss: 1.3365 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:17:47,912 - INFO - Epoch: 38/500, Iter: 1/119 -- train_loss: 1.3856 


[1/119]   1%|           [00:00<?]

2025-08-11 20:17:48,159 - INFO - Epoch: 38/500, Iter: 2/119 -- train_loss: 1.3100 
2025-08-11 20:17:48,403 - INFO - Epoch: 38/500, Iter: 3/119 -- train_loss: 1.2729 
2025-08-11 20:17:48,608 - INFO - Epoch: 38/500, Iter: 4/119 -- train_loss: 1.3699 
2025-08-11 20:17:48,845 - INFO - Epoch: 38/500, Iter: 5/119 -- train_loss: 1.3982 
2025-08-11 20:17:49,113 - INFO - Epoch: 38/500, Iter: 6/119 -- train_loss: 1.2906 
2025-08-11 20:17:49,321 - INFO - Epoch: 38/500, Iter: 7/119 -- train_loss: 1.3377 
2025-08-11 20:17:49,557 - INFO - Epoch: 38/500, Iter: 8/119 -- train_loss: 1.3106 
2025-08-11 20:17:51,618 - INFO - Epoch: 38/500, Iter: 9/119 -- train_loss: 1.2297 
2025-08-11 20:17:51,863 - INFO - Epoch: 38/500, Iter: 10/119 -- train_loss: 1.3901 
2025-08-11 20:17:52,105 - INFO - Epoch: 38/500, Iter: 11/119 -- train_loss: 1.2721 
2025-08-11 20:17:52,340 - INFO - Epoch: 38/500, Iter: 12/119 -- train_loss: 1.3373 
2025-08-11 20:17:52,630 - INFO - Epoch: 38/500, Iter: 13/119 -- train_loss: 1.3585 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:20:41,638 - INFO - Epoch: 39/500, Iter: 1/119 -- train_loss: 1.4279 


[1/119]   1%|           [00:00<?]

2025-08-11 20:20:41,865 - INFO - Epoch: 39/500, Iter: 2/119 -- train_loss: 1.3003 
2025-08-11 20:20:42,131 - INFO - Epoch: 39/500, Iter: 3/119 -- train_loss: 1.3957 
2025-08-11 20:20:42,362 - INFO - Epoch: 39/500, Iter: 4/119 -- train_loss: 1.4234 
2025-08-11 20:20:42,637 - INFO - Epoch: 39/500, Iter: 5/119 -- train_loss: 1.3205 
2025-08-11 20:20:44,788 - INFO - Epoch: 39/500, Iter: 6/119 -- train_loss: 1.3679 
2025-08-11 20:20:45,058 - INFO - Epoch: 39/500, Iter: 7/119 -- train_loss: 1.4620 
2025-08-11 20:20:45,273 - INFO - Epoch: 39/500, Iter: 8/119 -- train_loss: 1.3366 
2025-08-11 20:20:45,533 - INFO - Epoch: 39/500, Iter: 9/119 -- train_loss: 1.2408 
2025-08-11 20:20:45,750 - INFO - Epoch: 39/500, Iter: 10/119 -- train_loss: 1.3726 
2025-08-11 20:20:45,996 - INFO - Epoch: 39/500, Iter: 11/119 -- train_loss: 1.2759 
2025-08-11 20:20:46,267 - INFO - Epoch: 39/500, Iter: 12/119 -- train_loss: 1.3785 
2025-08-11 20:20:46,534 - INFO - Epoch: 39/500, Iter: 13/119 -- train_loss: 1.3336 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:23:37,992 - INFO - Epoch: 40/500, Iter: 1/119 -- train_loss: 1.2908 


[1/119]   1%|           [00:00<?]

2025-08-11 20:23:39,066 - INFO - Epoch: 40/500, Iter: 2/119 -- train_loss: 1.3281 
2025-08-11 20:23:39,291 - INFO - Epoch: 40/500, Iter: 3/119 -- train_loss: 1.3113 
2025-08-11 20:23:39,549 - INFO - Epoch: 40/500, Iter: 4/119 -- train_loss: 1.3199 
2025-08-11 20:23:39,773 - INFO - Epoch: 40/500, Iter: 5/119 -- train_loss: 1.4457 
2025-08-11 20:23:40,030 - INFO - Epoch: 40/500, Iter: 6/119 -- train_loss: 1.3905 
2025-08-11 20:23:40,290 - INFO - Epoch: 40/500, Iter: 7/119 -- train_loss: 1.4468 
2025-08-11 20:23:40,522 - INFO - Epoch: 40/500, Iter: 8/119 -- train_loss: 1.2775 
2025-08-11 20:23:40,784 - INFO - Epoch: 40/500, Iter: 9/119 -- train_loss: 1.3938 
2025-08-11 20:23:42,240 - INFO - Epoch: 40/500, Iter: 10/119 -- train_loss: 1.4054 
2025-08-11 20:23:42,497 - INFO - Epoch: 40/500, Iter: 11/119 -- train_loss: 1.3233 
2025-08-11 20:23:42,748 - INFO - Epoch: 40/500, Iter: 12/119 -- train_loss: 1.3574 
2025-08-11 20:23:42,986 - INFO - Epoch: 40/500, Iter: 13/119 -- train_loss: 1.3983 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:26:39,165 - INFO - Epoch: 41/500, Iter: 1/119 -- train_loss: 1.3953 


[1/119]   1%|           [00:00<?]

2025-08-11 20:26:39,399 - INFO - Epoch: 41/500, Iter: 2/119 -- train_loss: 1.3764 
2025-08-11 20:26:39,651 - INFO - Epoch: 41/500, Iter: 3/119 -- train_loss: 1.2613 
2025-08-11 20:26:39,890 - INFO - Epoch: 41/500, Iter: 4/119 -- train_loss: 1.4372 
2025-08-11 20:26:40,125 - INFO - Epoch: 41/500, Iter: 5/119 -- train_loss: 1.4656 
2025-08-11 20:26:40,335 - INFO - Epoch: 41/500, Iter: 6/119 -- train_loss: 1.3956 
2025-08-11 20:26:40,585 - INFO - Epoch: 41/500, Iter: 7/119 -- train_loss: 1.4356 
2025-08-11 20:26:40,842 - INFO - Epoch: 41/500, Iter: 8/119 -- train_loss: 1.4169 
2025-08-11 20:26:41,741 - INFO - Epoch: 41/500, Iter: 9/119 -- train_loss: 1.3706 
2025-08-11 20:26:42,000 - INFO - Epoch: 41/500, Iter: 10/119 -- train_loss: 1.4196 
2025-08-11 20:26:42,273 - INFO - Epoch: 41/500, Iter: 11/119 -- train_loss: 1.5060 
2025-08-11 20:26:46,031 - INFO - Epoch: 41/500, Iter: 12/119 -- train_loss: 1.7734 
2025-08-11 20:26:46,271 - INFO - Epoch: 41/500, Iter: 13/119 -- train_loss: 1.3611 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:29:33,290 - INFO - Epoch: 42/500, Iter: 1/119 -- train_loss: 1.3304 


[1/119]   1%|           [00:00<?]

2025-08-11 20:29:33,539 - INFO - Epoch: 42/500, Iter: 2/119 -- train_loss: 1.3792 
2025-08-11 20:29:33,772 - INFO - Epoch: 42/500, Iter: 3/119 -- train_loss: 1.3124 
2025-08-11 20:29:34,007 - INFO - Epoch: 42/500, Iter: 4/119 -- train_loss: 1.3941 
2025-08-11 20:29:34,251 - INFO - Epoch: 42/500, Iter: 5/119 -- train_loss: 1.2997 
2025-08-11 20:29:34,490 - INFO - Epoch: 42/500, Iter: 6/119 -- train_loss: 1.2963 
2025-08-11 20:29:34,744 - INFO - Epoch: 42/500, Iter: 7/119 -- train_loss: 1.3870 
2025-08-11 20:29:34,963 - INFO - Epoch: 42/500, Iter: 8/119 -- train_loss: 1.3898 
2025-08-11 20:29:35,700 - INFO - Epoch: 42/500, Iter: 9/119 -- train_loss: 1.3469 
2025-08-11 20:29:35,950 - INFO - Epoch: 42/500, Iter: 10/119 -- train_loss: 1.3347 
2025-08-11 20:29:36,227 - INFO - Epoch: 42/500, Iter: 11/119 -- train_loss: 1.3052 
2025-08-11 20:29:36,474 - INFO - Epoch: 42/500, Iter: 12/119 -- train_loss: 1.5505 
2025-08-11 20:29:36,720 - INFO - Epoch: 42/500, Iter: 13/119 -- train_loss: 1.4038 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:32:32,199 - INFO - Epoch: 43/500, Iter: 1/119 -- train_loss: 1.5832 


[1/119]   1%|           [00:00<?]

2025-08-11 20:32:35,019 - INFO - Epoch: 43/500, Iter: 2/119 -- train_loss: 1.3975 
2025-08-11 20:32:35,248 - INFO - Epoch: 43/500, Iter: 3/119 -- train_loss: 1.3067 
2025-08-11 20:32:35,507 - INFO - Epoch: 43/500, Iter: 4/119 -- train_loss: 1.2791 
2025-08-11 20:32:35,747 - INFO - Epoch: 43/500, Iter: 5/119 -- train_loss: 1.2781 
2025-08-11 20:32:36,006 - INFO - Epoch: 43/500, Iter: 6/119 -- train_loss: 1.3989 
2025-08-11 20:32:36,240 - INFO - Epoch: 43/500, Iter: 7/119 -- train_loss: 1.4057 
2025-08-11 20:32:36,499 - INFO - Epoch: 43/500, Iter: 8/119 -- train_loss: 1.2713 
2025-08-11 20:32:36,803 - INFO - Epoch: 43/500, Iter: 9/119 -- train_loss: 1.4205 
2025-08-11 20:32:39,831 - INFO - Epoch: 43/500, Iter: 10/119 -- train_loss: 1.3634 
2025-08-11 20:32:40,692 - INFO - Epoch: 43/500, Iter: 11/119 -- train_loss: 1.3781 
2025-08-11 20:32:40,918 - INFO - Epoch: 43/500, Iter: 12/119 -- train_loss: 1.3331 
2025-08-11 20:32:41,132 - INFO - Epoch: 43/500, Iter: 13/119 -- train_loss: 1.3088 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:35:23,341 - INFO - Epoch: 44/500, Iter: 1/119 -- train_loss: 1.3737 


[1/119]   1%|           [00:00<?]

2025-08-11 20:35:23,575 - INFO - Epoch: 44/500, Iter: 2/119 -- train_loss: 1.3225 
2025-08-11 20:35:23,835 - INFO - Epoch: 44/500, Iter: 3/119 -- train_loss: 1.2789 
2025-08-11 20:35:27,701 - INFO - Epoch: 44/500, Iter: 4/119 -- train_loss: 1.6153 
2025-08-11 20:35:27,928 - INFO - Epoch: 44/500, Iter: 5/119 -- train_loss: 1.3109 
2025-08-11 20:35:28,153 - INFO - Epoch: 44/500, Iter: 6/119 -- train_loss: 1.4112 
2025-08-11 20:35:28,389 - INFO - Epoch: 44/500, Iter: 7/119 -- train_loss: 1.3951 
2025-08-11 20:35:28,608 - INFO - Epoch: 44/500, Iter: 8/119 -- train_loss: 1.4606 
2025-08-11 20:35:28,834 - INFO - Epoch: 44/500, Iter: 9/119 -- train_loss: 1.3720 
2025-08-11 20:35:29,091 - INFO - Epoch: 44/500, Iter: 10/119 -- train_loss: 1.2776 
2025-08-11 20:35:29,350 - INFO - Epoch: 44/500, Iter: 11/119 -- train_loss: 1.4085 
2025-08-11 20:35:29,972 - INFO - Epoch: 44/500, Iter: 12/119 -- train_loss: 1.4098 
2025-08-11 20:35:30,241 - INFO - Epoch: 44/500, Iter: 13/119 -- train_loss: 1.4376 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:38:10,554 - INFO - Epoch: 45/500, Iter: 1/119 -- train_loss: 1.3717 


[1/119]   1%|           [00:00<?]

2025-08-11 20:38:11,591 - INFO - Epoch: 45/500, Iter: 2/119 -- train_loss: 1.3898 
2025-08-11 20:38:11,834 - INFO - Epoch: 45/500, Iter: 3/119 -- train_loss: 1.3574 
2025-08-11 20:38:12,412 - INFO - Epoch: 45/500, Iter: 4/119 -- train_loss: 1.4483 
2025-08-11 20:38:12,666 - INFO - Epoch: 45/500, Iter: 5/119 -- train_loss: 1.4131 
2025-08-11 20:38:12,907 - INFO - Epoch: 45/500, Iter: 6/119 -- train_loss: 1.3800 
2025-08-11 20:38:13,161 - INFO - Epoch: 45/500, Iter: 7/119 -- train_loss: 1.3404 
2025-08-11 20:38:13,430 - INFO - Epoch: 45/500, Iter: 8/119 -- train_loss: 1.3751 
2025-08-11 20:38:14,149 - INFO - Epoch: 45/500, Iter: 9/119 -- train_loss: 1.3291 
2025-08-11 20:38:14,397 - INFO - Epoch: 45/500, Iter: 10/119 -- train_loss: 1.3998 
2025-08-11 20:38:14,908 - INFO - Epoch: 45/500, Iter: 11/119 -- train_loss: 1.3177 
2025-08-11 20:38:15,921 - INFO - Epoch: 45/500, Iter: 12/119 -- train_loss: 1.4050 
2025-08-11 20:38:16,148 - INFO - Epoch: 45/500, Iter: 13/119 -- train_loss: 1.4489 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:41:03,048 - INFO - Epoch: 46/500, Iter: 1/119 -- train_loss: 1.3138 


[1/119]   1%|           [00:00<?]

2025-08-11 20:41:03,292 - INFO - Epoch: 46/500, Iter: 2/119 -- train_loss: 1.3667 
2025-08-11 20:41:03,550 - INFO - Epoch: 46/500, Iter: 3/119 -- train_loss: 1.3349 
2025-08-11 20:41:03,769 - INFO - Epoch: 46/500, Iter: 4/119 -- train_loss: 1.4035 
2025-08-11 20:41:03,984 - INFO - Epoch: 46/500, Iter: 5/119 -- train_loss: 1.6469 
2025-08-11 20:41:04,221 - INFO - Epoch: 46/500, Iter: 6/119 -- train_loss: 1.3293 
2025-08-11 20:41:04,459 - INFO - Epoch: 46/500, Iter: 7/119 -- train_loss: 1.3681 
2025-08-11 20:41:04,726 - INFO - Epoch: 46/500, Iter: 8/119 -- train_loss: 1.4487 
2025-08-11 20:41:06,126 - INFO - Epoch: 46/500, Iter: 9/119 -- train_loss: 1.3129 
2025-08-11 20:41:06,368 - INFO - Epoch: 46/500, Iter: 10/119 -- train_loss: 1.4075 
2025-08-11 20:41:06,595 - INFO - Epoch: 46/500, Iter: 11/119 -- train_loss: 1.2563 
2025-08-11 20:41:06,844 - INFO - Epoch: 46/500, Iter: 12/119 -- train_loss: 1.4147 
2025-08-11 20:41:07,068 - INFO - Epoch: 46/500, Iter: 13/119 -- train_loss: 1.2699 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:43:53,218 - INFO - Epoch: 47/500, Iter: 1/119 -- train_loss: 1.2713 


[1/119]   1%|           [00:00<?]

2025-08-11 20:43:57,385 - INFO - Epoch: 47/500, Iter: 2/119 -- train_loss: 1.3350 
2025-08-11 20:43:57,622 - INFO - Epoch: 47/500, Iter: 3/119 -- train_loss: 1.2555 
2025-08-11 20:43:57,880 - INFO - Epoch: 47/500, Iter: 4/119 -- train_loss: 1.2608 
2025-08-11 20:43:58,116 - INFO - Epoch: 47/500, Iter: 5/119 -- train_loss: 1.5227 
2025-08-11 20:43:58,358 - INFO - Epoch: 47/500, Iter: 6/119 -- train_loss: 1.3201 
2025-08-11 20:43:58,620 - INFO - Epoch: 47/500, Iter: 7/119 -- train_loss: 1.4930 
2025-08-11 20:43:58,871 - INFO - Epoch: 47/500, Iter: 8/119 -- train_loss: 1.3357 
2025-08-11 20:43:59,133 - INFO - Epoch: 47/500, Iter: 9/119 -- train_loss: 1.3134 
2025-08-11 20:44:02,681 - INFO - Epoch: 47/500, Iter: 10/119 -- train_loss: 1.3555 
2025-08-11 20:44:02,915 - INFO - Epoch: 47/500, Iter: 11/119 -- train_loss: 1.4206 
2025-08-11 20:44:03,157 - INFO - Epoch: 47/500, Iter: 12/119 -- train_loss: 1.4053 
2025-08-11 20:44:03,415 - INFO - Epoch: 47/500, Iter: 13/119 -- train_loss: 1.3155 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:46:44,540 - INFO - Epoch: 48/500, Iter: 1/119 -- train_loss: 1.3284 


[1/119]   1%|           [00:00<?]

2025-08-11 20:46:44,798 - INFO - Epoch: 48/500, Iter: 2/119 -- train_loss: 1.3961 
2025-08-11 20:46:45,064 - INFO - Epoch: 48/500, Iter: 3/119 -- train_loss: 1.3725 
2025-08-11 20:46:45,288 - INFO - Epoch: 48/500, Iter: 4/119 -- train_loss: 1.2776 
2025-08-11 20:46:46,580 - INFO - Epoch: 48/500, Iter: 5/119 -- train_loss: 1.4331 
2025-08-11 20:46:47,590 - INFO - Epoch: 48/500, Iter: 6/119 -- train_loss: 1.3277 
2025-08-11 20:46:47,824 - INFO - Epoch: 48/500, Iter: 7/119 -- train_loss: 1.3915 
2025-08-11 20:46:48,078 - INFO - Epoch: 48/500, Iter: 8/119 -- train_loss: 1.3123 
2025-08-11 20:46:48,313 - INFO - Epoch: 48/500, Iter: 9/119 -- train_loss: 1.3591 
2025-08-11 20:46:48,582 - INFO - Epoch: 48/500, Iter: 10/119 -- train_loss: 1.3655 
2025-08-11 20:46:48,819 - INFO - Epoch: 48/500, Iter: 11/119 -- train_loss: 1.3031 
2025-08-11 20:46:49,085 - INFO - Epoch: 48/500, Iter: 12/119 -- train_loss: 1.3142 
2025-08-11 20:46:51,076 - INFO - Epoch: 48/500, Iter: 13/119 -- train_loss: 1.2705 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:49:41,890 - INFO - Epoch: 49/500, Iter: 1/119 -- train_loss: 1.3564 


[1/119]   1%|           [00:00<?]

2025-08-11 20:49:42,172 - INFO - Epoch: 49/500, Iter: 2/119 -- train_loss: 1.3274 
2025-08-11 20:49:42,401 - INFO - Epoch: 49/500, Iter: 3/119 -- train_loss: 1.3124 
2025-08-11 20:49:42,640 - INFO - Epoch: 49/500, Iter: 4/119 -- train_loss: 1.3732 
2025-08-11 20:49:42,903 - INFO - Epoch: 49/500, Iter: 5/119 -- train_loss: 1.2777 
2025-08-11 20:49:43,143 - INFO - Epoch: 49/500, Iter: 6/119 -- train_loss: 1.2848 
2025-08-11 20:49:43,395 - INFO - Epoch: 49/500, Iter: 7/119 -- train_loss: 1.3269 
2025-08-11 20:49:43,672 - INFO - Epoch: 49/500, Iter: 8/119 -- train_loss: 1.4844 
2025-08-11 20:49:46,789 - INFO - Epoch: 49/500, Iter: 9/119 -- train_loss: 1.3358 
2025-08-11 20:49:47,037 - INFO - Epoch: 49/500, Iter: 10/119 -- train_loss: 1.2940 
2025-08-11 20:49:48,916 - INFO - Epoch: 49/500, Iter: 11/119 -- train_loss: 1.4696 
2025-08-11 20:49:49,109 - INFO - Epoch: 49/500, Iter: 12/119 -- train_loss: 1.4097 
2025-08-11 20:49:49,335 - INFO - Epoch: 49/500, Iter: 13/119 -- train_loss: 1.2750 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:52:36,044 - INFO - Epoch: 50/500, Iter: 1/119 -- train_loss: 1.3619 


[1/119]   1%|           [00:00<?]

2025-08-11 20:52:36,300 - INFO - Epoch: 50/500, Iter: 2/119 -- train_loss: 1.3169 
2025-08-11 20:52:36,590 - INFO - Epoch: 50/500, Iter: 3/119 -- train_loss: 1.3435 
2025-08-11 20:52:38,320 - INFO - Epoch: 50/500, Iter: 4/119 -- train_loss: 1.4227 
2025-08-11 20:52:38,537 - INFO - Epoch: 50/500, Iter: 5/119 -- train_loss: 1.3820 
2025-08-11 20:52:38,761 - INFO - Epoch: 50/500, Iter: 6/119 -- train_loss: 1.4397 
2025-08-11 20:52:39,000 - INFO - Epoch: 50/500, Iter: 7/119 -- train_loss: 1.3575 
2025-08-11 20:52:39,254 - INFO - Epoch: 50/500, Iter: 8/119 -- train_loss: 1.4253 
2025-08-11 20:52:39,494 - INFO - Epoch: 50/500, Iter: 9/119 -- train_loss: 1.3465 
2025-08-11 20:52:39,728 - INFO - Epoch: 50/500, Iter: 10/119 -- train_loss: 1.3290 
2025-08-11 20:52:39,961 - INFO - Epoch: 50/500, Iter: 11/119 -- train_loss: 1.3563 
2025-08-11 20:52:40,827 - INFO - Epoch: 50/500, Iter: 12/119 -- train_loss: 1.2772 
2025-08-11 20:52:41,060 - INFO - Epoch: 50/500, Iter: 13/119 -- train_loss: 1.2794 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:55:30,248 - INFO - Epoch: 51/500, Iter: 1/119 -- train_loss: 1.3089 


[1/119]   1%|           [00:00<?]

2025-08-11 20:55:31,608 - INFO - Epoch: 51/500, Iter: 2/119 -- train_loss: 1.3868 
2025-08-11 20:55:31,824 - INFO - Epoch: 51/500, Iter: 3/119 -- train_loss: 1.3432 
2025-08-11 20:55:32,092 - INFO - Epoch: 51/500, Iter: 4/119 -- train_loss: 1.3007 
2025-08-11 20:55:33,069 - INFO - Epoch: 51/500, Iter: 5/119 -- train_loss: 1.3555 
2025-08-11 20:55:33,287 - INFO - Epoch: 51/500, Iter: 6/119 -- train_loss: 1.3549 
2025-08-11 20:55:33,524 - INFO - Epoch: 51/500, Iter: 7/119 -- train_loss: 1.4179 
2025-08-11 20:55:33,741 - INFO - Epoch: 51/500, Iter: 8/119 -- train_loss: 1.2989 
2025-08-11 20:55:33,960 - INFO - Epoch: 51/500, Iter: 9/119 -- train_loss: 1.4860 
2025-08-11 20:55:35,277 - INFO - Epoch: 51/500, Iter: 10/119 -- train_loss: 1.3400 
2025-08-11 20:55:35,522 - INFO - Epoch: 51/500, Iter: 11/119 -- train_loss: 1.3282 
2025-08-11 20:55:35,781 - INFO - Epoch: 51/500, Iter: 12/119 -- train_loss: 1.3486 
2025-08-11 20:55:36,039 - INFO - Epoch: 51/500, Iter: 13/119 -- train_loss: 1.3862 


[1/20]   5%|5          [00:00<?]

2025-08-11 20:58:19,146 - INFO - Epoch: 52/500, Iter: 1/119 -- train_loss: 1.2683 


[1/119]   1%|           [00:00<?]

2025-08-11 20:58:19,499 - INFO - Epoch: 52/500, Iter: 2/119 -- train_loss: 1.2878 
2025-08-11 20:58:21,379 - INFO - Epoch: 52/500, Iter: 3/119 -- train_loss: 1.3237 
2025-08-11 20:58:21,628 - INFO - Epoch: 52/500, Iter: 4/119 -- train_loss: 1.2424 
2025-08-11 20:58:21,857 - INFO - Epoch: 52/500, Iter: 5/119 -- train_loss: 1.3682 
2025-08-11 20:58:22,115 - INFO - Epoch: 52/500, Iter: 6/119 -- train_loss: 1.3096 
2025-08-11 20:58:22,382 - INFO - Epoch: 52/500, Iter: 7/119 -- train_loss: 1.2947 
2025-08-11 20:58:22,617 - INFO - Epoch: 52/500, Iter: 8/119 -- train_loss: 1.3152 
2025-08-11 20:58:22,875 - INFO - Epoch: 52/500, Iter: 9/119 -- train_loss: 1.3979 
2025-08-11 20:58:24,513 - INFO - Epoch: 52/500, Iter: 10/119 -- train_loss: 1.3778 
2025-08-11 20:58:25,309 - INFO - Epoch: 52/500, Iter: 11/119 -- train_loss: 1.3477 
2025-08-11 20:58:25,540 - INFO - Epoch: 52/500, Iter: 12/119 -- train_loss: 1.3108 
2025-08-11 20:58:25,772 - INFO - Epoch: 52/500, Iter: 13/119 -- train_loss: 1.4211 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:01:10,336 - INFO - Epoch: 53/500, Iter: 1/119 -- train_loss: 1.3236 


[1/119]   1%|           [00:00<?]

2025-08-11 21:01:10,538 - INFO - Epoch: 53/500, Iter: 2/119 -- train_loss: 1.3259 
2025-08-11 21:01:10,771 - INFO - Epoch: 53/500, Iter: 3/119 -- train_loss: 1.3651 
2025-08-11 21:01:11,015 - INFO - Epoch: 53/500, Iter: 4/119 -- train_loss: 1.4368 
2025-08-11 21:01:11,242 - INFO - Epoch: 53/500, Iter: 5/119 -- train_loss: 1.3650 
2025-08-11 21:01:11,484 - INFO - Epoch: 53/500, Iter: 6/119 -- train_loss: 1.2506 
2025-08-11 21:01:11,738 - INFO - Epoch: 53/500, Iter: 7/119 -- train_loss: 1.3589 
2025-08-11 21:01:12,008 - INFO - Epoch: 53/500, Iter: 8/119 -- train_loss: 1.2865 
2025-08-11 21:01:13,391 - INFO - Epoch: 53/500, Iter: 9/119 -- train_loss: 1.4248 
2025-08-11 21:01:13,645 - INFO - Epoch: 53/500, Iter: 10/119 -- train_loss: 1.4029 
2025-08-11 21:01:13,873 - INFO - Epoch: 53/500, Iter: 11/119 -- train_loss: 1.2705 
2025-08-11 21:01:14,136 - INFO - Epoch: 53/500, Iter: 12/119 -- train_loss: 1.4004 
2025-08-11 21:01:14,369 - INFO - Epoch: 53/500, Iter: 13/119 -- train_loss: 1.4343 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:04:00,469 - INFO - Epoch: 54/500, Iter: 1/119 -- train_loss: 1.3676 


[1/119]   1%|           [00:00<?]

2025-08-11 21:04:00,713 - INFO - Epoch: 54/500, Iter: 2/119 -- train_loss: 1.3292 
2025-08-11 21:04:00,930 - INFO - Epoch: 54/500, Iter: 3/119 -- train_loss: 1.3728 
2025-08-11 21:04:01,164 - INFO - Epoch: 54/500, Iter: 4/119 -- train_loss: 1.3589 
2025-08-11 21:04:01,384 - INFO - Epoch: 54/500, Iter: 5/119 -- train_loss: 1.4618 
2025-08-11 21:04:01,613 - INFO - Epoch: 54/500, Iter: 6/119 -- train_loss: 1.4724 
2025-08-11 21:04:01,875 - INFO - Epoch: 54/500, Iter: 7/119 -- train_loss: 1.3975 
2025-08-11 21:04:02,134 - INFO - Epoch: 54/500, Iter: 8/119 -- train_loss: 1.2192 
2025-08-11 21:04:03,918 - INFO - Epoch: 54/500, Iter: 9/119 -- train_loss: 1.3565 
2025-08-11 21:04:04,193 - INFO - Epoch: 54/500, Iter: 10/119 -- train_loss: 1.3684 
2025-08-11 21:04:04,430 - INFO - Epoch: 54/500, Iter: 11/119 -- train_loss: 1.3665 
2025-08-11 21:04:04,668 - INFO - Epoch: 54/500, Iter: 12/119 -- train_loss: 1.3777 
2025-08-11 21:04:04,913 - INFO - Epoch: 54/500, Iter: 13/119 -- train_loss: 1.3016 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:06:53,008 - INFO - Epoch: 55/500, Iter: 1/119 -- train_loss: 1.3800 


[1/119]   1%|           [00:00<?]

2025-08-11 21:06:53,240 - INFO - Epoch: 55/500, Iter: 2/119 -- train_loss: 1.3811 
2025-08-11 21:06:53,490 - INFO - Epoch: 55/500, Iter: 3/119 -- train_loss: 1.4310 
2025-08-11 21:06:53,722 - INFO - Epoch: 55/500, Iter: 4/119 -- train_loss: 1.3496 
2025-08-11 21:06:53,974 - INFO - Epoch: 55/500, Iter: 5/119 -- train_loss: 1.4076 
2025-08-11 21:06:54,216 - INFO - Epoch: 55/500, Iter: 6/119 -- train_loss: 1.3242 
2025-08-11 21:06:54,489 - INFO - Epoch: 55/500, Iter: 7/119 -- train_loss: 1.6114 
2025-08-11 21:06:54,747 - INFO - Epoch: 55/500, Iter: 8/119 -- train_loss: 1.3773 
2025-08-11 21:06:55,161 - INFO - Epoch: 55/500, Iter: 9/119 -- train_loss: 1.2746 
2025-08-11 21:06:55,406 - INFO - Epoch: 55/500, Iter: 10/119 -- train_loss: 1.3843 
2025-08-11 21:06:55,668 - INFO - Epoch: 55/500, Iter: 11/119 -- train_loss: 1.3495 
2025-08-11 21:06:55,933 - INFO - Epoch: 55/500, Iter: 12/119 -- train_loss: 1.3167 
2025-08-11 21:06:56,194 - INFO - Epoch: 55/500, Iter: 13/119 -- train_loss: 1.3183 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:09:45,153 - INFO - Epoch: 56/500, Iter: 1/119 -- train_loss: 1.3533 


[1/119]   1%|           [00:00<?]

2025-08-11 21:09:45,404 - INFO - Epoch: 56/500, Iter: 2/119 -- train_loss: 1.3436 
2025-08-11 21:09:45,648 - INFO - Epoch: 56/500, Iter: 3/119 -- train_loss: 1.3258 
2025-08-11 21:09:45,917 - INFO - Epoch: 56/500, Iter: 4/119 -- train_loss: 1.3378 
2025-08-11 21:09:46,169 - INFO - Epoch: 56/500, Iter: 5/119 -- train_loss: 1.3418 
2025-08-11 21:09:46,425 - INFO - Epoch: 56/500, Iter: 6/119 -- train_loss: 1.3635 
2025-08-11 21:09:47,081 - INFO - Epoch: 56/500, Iter: 7/119 -- train_loss: 1.2915 
2025-08-11 21:09:47,310 - INFO - Epoch: 56/500, Iter: 8/119 -- train_loss: 1.3664 
2025-08-11 21:09:47,805 - INFO - Epoch: 56/500, Iter: 9/119 -- train_loss: 1.4356 
2025-08-11 21:09:48,058 - INFO - Epoch: 56/500, Iter: 10/119 -- train_loss: 1.3436 
2025-08-11 21:09:48,302 - INFO - Epoch: 56/500, Iter: 11/119 -- train_loss: 1.2845 
2025-08-11 21:09:49,230 - INFO - Epoch: 56/500, Iter: 12/119 -- train_loss: 1.3715 
2025-08-11 21:09:49,488 - INFO - Epoch: 56/500, Iter: 13/119 -- train_loss: 1.3714 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:12:33,290 - INFO - Epoch: 57/500, Iter: 1/119 -- train_loss: 1.2320 


[1/119]   1%|           [00:00<?]

2025-08-11 21:12:33,892 - INFO - Epoch: 57/500, Iter: 2/119 -- train_loss: 1.3545 
2025-08-11 21:12:34,146 - INFO - Epoch: 57/500, Iter: 3/119 -- train_loss: 1.3338 
2025-08-11 21:12:34,394 - INFO - Epoch: 57/500, Iter: 4/119 -- train_loss: 1.3408 
2025-08-11 21:12:34,634 - INFO - Epoch: 57/500, Iter: 5/119 -- train_loss: 1.3034 
2025-08-11 21:12:34,979 - INFO - Epoch: 57/500, Iter: 6/119 -- train_loss: 1.3364 
2025-08-11 21:12:35,227 - INFO - Epoch: 57/500, Iter: 7/119 -- train_loss: 1.3819 
2025-08-11 21:12:35,474 - INFO - Epoch: 57/500, Iter: 8/119 -- train_loss: 1.5021 
2025-08-11 21:12:37,317 - INFO - Epoch: 57/500, Iter: 9/119 -- train_loss: 1.3648 
2025-08-11 21:12:38,012 - INFO - Epoch: 57/500, Iter: 10/119 -- train_loss: 1.4306 
2025-08-11 21:12:38,269 - INFO - Epoch: 57/500, Iter: 11/119 -- train_loss: 1.2985 
2025-08-11 21:12:38,519 - INFO - Epoch: 57/500, Iter: 12/119 -- train_loss: 1.4240 
2025-08-11 21:12:41,100 - INFO - Epoch: 57/500, Iter: 13/119 -- train_loss: 1.4150 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:15:25,686 - INFO - Epoch: 58/500, Iter: 1/119 -- train_loss: 1.3238 


[1/119]   1%|           [00:00<?]

2025-08-11 21:15:28,440 - INFO - Epoch: 58/500, Iter: 2/119 -- train_loss: 1.4292 
2025-08-11 21:15:28,647 - INFO - Epoch: 58/500, Iter: 3/119 -- train_loss: 1.3381 
2025-08-11 21:15:28,903 - INFO - Epoch: 58/500, Iter: 4/119 -- train_loss: 1.3565 
2025-08-11 21:15:29,143 - INFO - Epoch: 58/500, Iter: 5/119 -- train_loss: 1.3144 
2025-08-11 21:15:29,378 - INFO - Epoch: 58/500, Iter: 6/119 -- train_loss: 1.4712 
2025-08-11 21:15:29,594 - INFO - Epoch: 58/500, Iter: 7/119 -- train_loss: 1.2816 
2025-08-11 21:15:29,837 - INFO - Epoch: 58/500, Iter: 8/119 -- train_loss: 1.3508 
2025-08-11 21:15:30,103 - INFO - Epoch: 58/500, Iter: 9/119 -- train_loss: 1.3952 
2025-08-11 21:15:31,916 - INFO - Epoch: 58/500, Iter: 10/119 -- train_loss: 1.3312 
2025-08-11 21:15:32,153 - INFO - Epoch: 58/500, Iter: 11/119 -- train_loss: 1.3457 
2025-08-11 21:15:32,404 - INFO - Epoch: 58/500, Iter: 12/119 -- train_loss: 1.2855 
2025-08-11 21:15:32,671 - INFO - Epoch: 58/500, Iter: 13/119 -- train_loss: 1.3118 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:18:15,592 - INFO - Epoch: 59/500, Iter: 1/119 -- train_loss: 1.4485 


[1/119]   1%|           [00:00<?]

2025-08-11 21:18:15,864 - INFO - Epoch: 59/500, Iter: 2/119 -- train_loss: 1.3612 
2025-08-11 21:18:16,132 - INFO - Epoch: 59/500, Iter: 3/119 -- train_loss: 1.3829 
2025-08-11 21:18:17,020 - INFO - Epoch: 59/500, Iter: 4/119 -- train_loss: 1.3315 
2025-08-11 21:18:17,258 - INFO - Epoch: 59/500, Iter: 5/119 -- train_loss: 1.3639 
2025-08-11 21:18:17,509 - INFO - Epoch: 59/500, Iter: 6/119 -- train_loss: 1.4340 
2025-08-11 21:18:17,725 - INFO - Epoch: 59/500, Iter: 7/119 -- train_loss: 1.3807 
2025-08-11 21:18:18,075 - INFO - Epoch: 59/500, Iter: 8/119 -- train_loss: 1.4762 
2025-08-11 21:18:21,108 - INFO - Epoch: 59/500, Iter: 9/119 -- train_loss: 1.2965 
2025-08-11 21:18:21,345 - INFO - Epoch: 59/500, Iter: 10/119 -- train_loss: 1.2878 
2025-08-11 21:18:21,571 - INFO - Epoch: 59/500, Iter: 11/119 -- train_loss: 1.2150 
2025-08-11 21:18:21,802 - INFO - Epoch: 59/500, Iter: 12/119 -- train_loss: 1.3611 
2025-08-11 21:18:22,042 - INFO - Epoch: 59/500, Iter: 13/119 -- train_loss: 1.3374 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:21:12,517 - INFO - Epoch: 60/500, Iter: 1/119 -- train_loss: 1.4995 


[1/119]   1%|           [00:00<?]

2025-08-11 21:21:12,769 - INFO - Epoch: 60/500, Iter: 2/119 -- train_loss: 1.5695 
2025-08-11 21:21:14,429 - INFO - Epoch: 60/500, Iter: 3/119 -- train_loss: 1.3718 
2025-08-11 21:21:14,666 - INFO - Epoch: 60/500, Iter: 4/119 -- train_loss: 1.3585 
2025-08-11 21:21:14,932 - INFO - Epoch: 60/500, Iter: 5/119 -- train_loss: 1.3599 
2025-08-11 21:21:15,171 - INFO - Epoch: 60/500, Iter: 6/119 -- train_loss: 1.3481 
2025-08-11 21:21:15,415 - INFO - Epoch: 60/500, Iter: 7/119 -- train_loss: 1.4685 
2025-08-11 21:21:15,670 - INFO - Epoch: 60/500, Iter: 8/119 -- train_loss: 1.2726 
2025-08-11 21:21:16,644 - INFO - Epoch: 60/500, Iter: 9/119 -- train_loss: 1.3433 
2025-08-11 21:21:16,895 - INFO - Epoch: 60/500, Iter: 10/119 -- train_loss: 1.3437 
2025-08-11 21:21:18,035 - INFO - Epoch: 60/500, Iter: 11/119 -- train_loss: 1.3643 
2025-08-11 21:21:18,284 - INFO - Epoch: 60/500, Iter: 12/119 -- train_loss: 1.4264 
2025-08-11 21:21:18,535 - INFO - Epoch: 60/500, Iter: 13/119 -- train_loss: 1.2892 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:24:00,128 - INFO - Epoch: 61/500, Iter: 1/119 -- train_loss: 1.3905 


[1/119]   1%|           [00:00<?]

2025-08-11 21:24:00,382 - INFO - Epoch: 61/500, Iter: 2/119 -- train_loss: 1.2928 
2025-08-11 21:24:00,645 - INFO - Epoch: 61/500, Iter: 3/119 -- train_loss: 1.2918 
2025-08-11 21:24:00,883 - INFO - Epoch: 61/500, Iter: 4/119 -- train_loss: 1.3384 
2025-08-11 21:24:01,182 - INFO - Epoch: 61/500, Iter: 5/119 -- train_loss: 1.2294 
2025-08-11 21:24:01,449 - INFO - Epoch: 61/500, Iter: 6/119 -- train_loss: 1.3408 
2025-08-11 21:24:01,687 - INFO - Epoch: 61/500, Iter: 7/119 -- train_loss: 1.2977 
2025-08-11 21:24:01,945 - INFO - Epoch: 61/500, Iter: 8/119 -- train_loss: 1.2736 
2025-08-11 21:24:02,404 - INFO - Epoch: 61/500, Iter: 9/119 -- train_loss: 1.3466 
2025-08-11 21:24:02,683 - INFO - Epoch: 61/500, Iter: 10/119 -- train_loss: 1.2871 
2025-08-11 21:24:02,931 - INFO - Epoch: 61/500, Iter: 11/119 -- train_loss: 1.4595 
2025-08-11 21:24:03,186 - INFO - Epoch: 61/500, Iter: 12/119 -- train_loss: 1.3576 
2025-08-11 21:24:03,465 - INFO - Epoch: 61/500, Iter: 13/119 -- train_loss: 1.3323 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:26:44,669 - INFO - Epoch: 62/500, Iter: 1/119 -- train_loss: 1.3294 


[1/119]   1%|           [00:00<?]

2025-08-11 21:26:45,391 - INFO - Epoch: 62/500, Iter: 2/119 -- train_loss: 1.4078 
2025-08-11 21:26:45,645 - INFO - Epoch: 62/500, Iter: 3/119 -- train_loss: 1.2871 
2025-08-11 21:26:45,871 - INFO - Epoch: 62/500, Iter: 4/119 -- train_loss: 1.4068 
2025-08-11 21:26:46,118 - INFO - Epoch: 62/500, Iter: 5/119 -- train_loss: 1.2413 
2025-08-11 21:26:46,325 - INFO - Epoch: 62/500, Iter: 6/119 -- train_loss: 1.3557 
2025-08-11 21:26:46,568 - INFO - Epoch: 62/500, Iter: 7/119 -- train_loss: 1.4937 
2025-08-11 21:26:46,789 - INFO - Epoch: 62/500, Iter: 8/119 -- train_loss: 1.3164 
2025-08-11 21:26:47,457 - INFO - Epoch: 62/500, Iter: 9/119 -- train_loss: 1.3403 
2025-08-11 21:26:49,860 - INFO - Epoch: 62/500, Iter: 10/119 -- train_loss: 1.3019 
2025-08-11 21:26:50,094 - INFO - Epoch: 62/500, Iter: 11/119 -- train_loss: 1.3659 
2025-08-11 21:26:50,312 - INFO - Epoch: 62/500, Iter: 12/119 -- train_loss: 1.4070 
2025-08-11 21:26:50,538 - INFO - Epoch: 62/500, Iter: 13/119 -- train_loss: 1.3585 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:30:07,818 - INFO - Epoch: 63/500, Iter: 1/119 -- train_loss: 1.3226 


[1/119]   1%|           [00:00<?]

2025-08-11 21:30:08,360 - INFO - Epoch: 63/500, Iter: 2/119 -- train_loss: 1.4559 
2025-08-11 21:30:08,615 - INFO - Epoch: 63/500, Iter: 3/119 -- train_loss: 1.2875 
2025-08-11 21:30:09,410 - INFO - Epoch: 63/500, Iter: 4/119 -- train_loss: 1.4134 
2025-08-11 21:30:09,653 - INFO - Epoch: 63/500, Iter: 5/119 -- train_loss: 1.3878 
2025-08-11 21:30:09,893 - INFO - Epoch: 63/500, Iter: 6/119 -- train_loss: 1.3292 
2025-08-11 21:30:10,384 - INFO - Epoch: 63/500, Iter: 7/119 -- train_loss: 1.4273 
2025-08-11 21:30:10,618 - INFO - Epoch: 63/500, Iter: 8/119 -- train_loss: 1.2869 
2025-08-11 21:30:13,017 - INFO - Epoch: 63/500, Iter: 9/119 -- train_loss: 1.3608 
2025-08-11 21:30:13,253 - INFO - Epoch: 63/500, Iter: 10/119 -- train_loss: 1.4494 
2025-08-11 21:30:13,486 - INFO - Epoch: 63/500, Iter: 11/119 -- train_loss: 1.3182 
2025-08-11 21:30:13,731 - INFO - Epoch: 63/500, Iter: 12/119 -- train_loss: 1.2683 
2025-08-11 21:30:13,953 - INFO - Epoch: 63/500, Iter: 13/119 -- train_loss: 1.3320 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:32:59,336 - INFO - Epoch: 64/500, Iter: 1/119 -- train_loss: 1.3817 


[1/119]   1%|           [00:00<?]

2025-08-11 21:32:59,769 - INFO - Epoch: 64/500, Iter: 2/119 -- train_loss: 1.4426 
2025-08-11 21:33:01,347 - INFO - Epoch: 64/500, Iter: 3/119 -- train_loss: 1.3842 
2025-08-11 21:33:01,602 - INFO - Epoch: 64/500, Iter: 4/119 -- train_loss: 1.3293 
2025-08-11 21:33:03,209 - INFO - Epoch: 64/500, Iter: 5/119 -- train_loss: 1.4028 
2025-08-11 21:33:03,418 - INFO - Epoch: 64/500, Iter: 6/119 -- train_loss: 1.3078 
2025-08-11 21:33:03,652 - INFO - Epoch: 64/500, Iter: 7/119 -- train_loss: 1.2561 
2025-08-11 21:33:03,872 - INFO - Epoch: 64/500, Iter: 8/119 -- train_loss: 1.3038 
2025-08-11 21:33:04,102 - INFO - Epoch: 64/500, Iter: 9/119 -- train_loss: 1.3587 
2025-08-11 21:33:04,369 - INFO - Epoch: 64/500, Iter: 10/119 -- train_loss: 1.3017 
2025-08-11 21:33:07,052 - INFO - Epoch: 64/500, Iter: 11/119 -- train_loss: 1.4744 
2025-08-11 21:33:07,269 - INFO - Epoch: 64/500, Iter: 12/119 -- train_loss: 1.3490 
2025-08-11 21:33:07,652 - INFO - Epoch: 64/500, Iter: 13/119 -- train_loss: 1.4387 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:36:03,048 - INFO - Epoch: 65/500, Iter: 1/119 -- train_loss: 1.4003 


[1/119]   1%|           [00:00<?]

2025-08-11 21:36:03,345 - INFO - Epoch: 65/500, Iter: 2/119 -- train_loss: 1.3933 
2025-08-11 21:36:03,578 - INFO - Epoch: 65/500, Iter: 3/119 -- train_loss: 1.3648 
2025-08-11 21:36:03,794 - INFO - Epoch: 65/500, Iter: 4/119 -- train_loss: 1.3450 
2025-08-11 21:36:04,014 - INFO - Epoch: 65/500, Iter: 5/119 -- train_loss: 1.3695 
2025-08-11 21:36:04,243 - INFO - Epoch: 65/500, Iter: 6/119 -- train_loss: 1.3806 
2025-08-11 21:36:04,500 - INFO - Epoch: 65/500, Iter: 7/119 -- train_loss: 1.3612 
2025-08-11 21:36:04,727 - INFO - Epoch: 65/500, Iter: 8/119 -- train_loss: 1.3413 
2025-08-11 21:36:06,822 - INFO - Epoch: 65/500, Iter: 9/119 -- train_loss: 1.3494 
2025-08-11 21:36:07,100 - INFO - Epoch: 65/500, Iter: 10/119 -- train_loss: 1.2819 
2025-08-11 21:36:07,331 - INFO - Epoch: 65/500, Iter: 11/119 -- train_loss: 1.3804 
2025-08-11 21:36:07,591 - INFO - Epoch: 65/500, Iter: 12/119 -- train_loss: 1.3677 
2025-08-11 21:36:07,823 - INFO - Epoch: 65/500, Iter: 13/119 -- train_loss: 1.3499 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:38:46,634 - INFO - Epoch: 66/500, Iter: 1/119 -- train_loss: 1.3584 


[1/119]   1%|           [00:00<?]

2025-08-11 21:38:50,595 - INFO - Epoch: 66/500, Iter: 2/119 -- train_loss: 1.5322 
2025-08-11 21:38:50,858 - INFO - Epoch: 66/500, Iter: 3/119 -- train_loss: 1.2557 
2025-08-11 21:38:51,089 - INFO - Epoch: 66/500, Iter: 4/119 -- train_loss: 1.5104 
2025-08-11 21:38:51,310 - INFO - Epoch: 66/500, Iter: 5/119 -- train_loss: 1.4545 
2025-08-11 21:38:51,540 - INFO - Epoch: 66/500, Iter: 6/119 -- train_loss: 1.3743 
2025-08-11 21:38:51,779 - INFO - Epoch: 66/500, Iter: 7/119 -- train_loss: 1.3534 
2025-08-11 21:38:52,033 - INFO - Epoch: 66/500, Iter: 8/119 -- train_loss: 1.4047 
2025-08-11 21:38:52,308 - INFO - Epoch: 66/500, Iter: 9/119 -- train_loss: 1.4688 
2025-08-11 21:38:52,929 - INFO - Epoch: 66/500, Iter: 10/119 -- train_loss: 1.3854 
2025-08-11 21:38:54,742 - INFO - Epoch: 66/500, Iter: 11/119 -- train_loss: 1.4649 
2025-08-11 21:38:54,986 - INFO - Epoch: 66/500, Iter: 12/119 -- train_loss: 1.3329 
2025-08-11 21:38:55,225 - INFO - Epoch: 66/500, Iter: 13/119 -- train_loss: 1.2463 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:41:42,663 - INFO - Epoch: 67/500, Iter: 1/119 -- train_loss: 1.3119 


[1/119]   1%|           [00:00<?]

2025-08-11 21:41:43,344 - INFO - Epoch: 67/500, Iter: 2/119 -- train_loss: 1.3703 
2025-08-11 21:41:43,612 - INFO - Epoch: 67/500, Iter: 3/119 -- train_loss: 1.4252 
2025-08-11 21:41:43,863 - INFO - Epoch: 67/500, Iter: 4/119 -- train_loss: 1.3209 
2025-08-11 21:41:44,126 - INFO - Epoch: 67/500, Iter: 5/119 -- train_loss: 1.4018 
2025-08-11 21:41:44,386 - INFO - Epoch: 67/500, Iter: 6/119 -- train_loss: 1.3506 
2025-08-11 21:41:44,623 - INFO - Epoch: 67/500, Iter: 7/119 -- train_loss: 1.4442 
2025-08-11 21:41:44,880 - INFO - Epoch: 67/500, Iter: 8/119 -- train_loss: 1.4316 
2025-08-11 21:41:45,108 - INFO - Epoch: 67/500, Iter: 9/119 -- train_loss: 1.3465 
2025-08-11 21:41:47,567 - INFO - Epoch: 67/500, Iter: 10/119 -- train_loss: 1.3261 
2025-08-11 21:41:47,849 - INFO - Epoch: 67/500, Iter: 11/119 -- train_loss: 1.2888 
2025-08-11 21:41:48,082 - INFO - Epoch: 67/500, Iter: 12/119 -- train_loss: 1.4321 
2025-08-11 21:41:48,302 - INFO - Epoch: 67/500, Iter: 13/119 -- train_loss: 1.4035 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:44:38,305 - INFO - Epoch: 68/500, Iter: 1/119 -- train_loss: 1.3317 


[1/119]   1%|           [00:00<?]

2025-08-11 21:44:38,522 - INFO - Epoch: 68/500, Iter: 2/119 -- train_loss: 1.3128 
2025-08-11 21:44:39,915 - INFO - Epoch: 68/500, Iter: 3/119 -- train_loss: 1.4609 
2025-08-11 21:44:40,154 - INFO - Epoch: 68/500, Iter: 4/119 -- train_loss: 1.3382 
2025-08-11 21:44:40,388 - INFO - Epoch: 68/500, Iter: 5/119 -- train_loss: 1.3377 
2025-08-11 21:44:40,638 - INFO - Epoch: 68/500, Iter: 6/119 -- train_loss: 1.4291 
2025-08-11 21:44:40,880 - INFO - Epoch: 68/500, Iter: 7/119 -- train_loss: 1.2595 
2025-08-11 21:44:41,122 - INFO - Epoch: 68/500, Iter: 8/119 -- train_loss: 1.2737 
2025-08-11 21:44:42,038 - INFO - Epoch: 68/500, Iter: 9/119 -- train_loss: 1.4046 
2025-08-11 21:44:42,888 - INFO - Epoch: 68/500, Iter: 10/119 -- train_loss: 1.4066 
2025-08-11 21:44:43,115 - INFO - Epoch: 68/500, Iter: 11/119 -- train_loss: 1.3770 
2025-08-11 21:44:43,370 - INFO - Epoch: 68/500, Iter: 12/119 -- train_loss: 1.2737 
2025-08-11 21:44:43,621 - INFO - Epoch: 68/500, Iter: 13/119 -- train_loss: 1.3308 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:47:22,297 - INFO - Epoch: 69/500, Iter: 1/119 -- train_loss: 1.3306 


[1/119]   1%|           [00:00<?]

2025-08-11 21:47:22,765 - INFO - Epoch: 69/500, Iter: 2/119 -- train_loss: 1.3659 
2025-08-11 21:47:23,001 - INFO - Epoch: 69/500, Iter: 3/119 -- train_loss: 1.3517 
2025-08-11 21:47:23,257 - INFO - Epoch: 69/500, Iter: 4/119 -- train_loss: 1.2772 
2025-08-11 21:47:23,512 - INFO - Epoch: 69/500, Iter: 5/119 -- train_loss: 1.3724 
2025-08-11 21:47:25,865 - INFO - Epoch: 69/500, Iter: 6/119 -- train_loss: 1.3657 
2025-08-11 21:47:26,141 - INFO - Epoch: 69/500, Iter: 7/119 -- train_loss: 1.4119 
2025-08-11 21:47:26,381 - INFO - Epoch: 69/500, Iter: 8/119 -- train_loss: 1.3499 
2025-08-11 21:47:26,614 - INFO - Epoch: 69/500, Iter: 9/119 -- train_loss: 1.2949 
2025-08-11 21:47:26,864 - INFO - Epoch: 69/500, Iter: 10/119 -- train_loss: 1.3424 
2025-08-11 21:47:27,128 - INFO - Epoch: 69/500, Iter: 11/119 -- train_loss: 1.2716 
2025-08-11 21:47:27,773 - INFO - Epoch: 69/500, Iter: 12/119 -- train_loss: 1.5019 
2025-08-11 21:47:28,014 - INFO - Epoch: 69/500, Iter: 13/119 -- train_loss: 1.3673 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:50:20,390 - INFO - Epoch: 70/500, Iter: 1/119 -- train_loss: 1.3562 


[1/119]   1%|           [00:00<?]

2025-08-11 21:50:20,659 - INFO - Epoch: 70/500, Iter: 2/119 -- train_loss: 1.3329 
2025-08-11 21:50:21,600 - INFO - Epoch: 70/500, Iter: 3/119 -- train_loss: 1.3440 
2025-08-11 21:50:21,876 - INFO - Epoch: 70/500, Iter: 4/119 -- train_loss: 1.2927 
2025-08-11 21:50:22,116 - INFO - Epoch: 70/500, Iter: 5/119 -- train_loss: 1.3371 
2025-08-11 21:50:22,372 - INFO - Epoch: 70/500, Iter: 6/119 -- train_loss: 1.2374 
2025-08-11 21:50:22,589 - INFO - Epoch: 70/500, Iter: 7/119 -- train_loss: 1.3174 
2025-08-11 21:50:22,839 - INFO - Epoch: 70/500, Iter: 8/119 -- train_loss: 1.3492 
2025-08-11 21:50:23,127 - INFO - Epoch: 70/500, Iter: 9/119 -- train_loss: 1.3663 
2025-08-11 21:50:23,956 - INFO - Epoch: 70/500, Iter: 10/119 -- train_loss: 1.3353 
2025-08-11 21:50:26,932 - INFO - Epoch: 70/500, Iter: 11/119 -- train_loss: 1.4024 
2025-08-11 21:50:27,195 - INFO - Epoch: 70/500, Iter: 12/119 -- train_loss: 1.3708 
2025-08-11 21:50:27,482 - INFO - Epoch: 70/500, Iter: 13/119 -- train_loss: 1.3313 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:53:28,635 - INFO - Epoch: 71/500, Iter: 1/119 -- train_loss: 1.3854 


[1/119]   1%|           [00:00<?]

2025-08-11 21:53:28,906 - INFO - Epoch: 71/500, Iter: 2/119 -- train_loss: 1.2331 
2025-08-11 21:53:29,114 - INFO - Epoch: 71/500, Iter: 3/119 -- train_loss: 1.3675 
2025-08-11 21:53:29,338 - INFO - Epoch: 71/500, Iter: 4/119 -- train_loss: 1.2165 
2025-08-11 21:53:29,560 - INFO - Epoch: 71/500, Iter: 5/119 -- train_loss: 1.2820 
2025-08-11 21:53:29,815 - INFO - Epoch: 71/500, Iter: 6/119 -- train_loss: 1.3747 
2025-08-11 21:53:30,064 - INFO - Epoch: 71/500, Iter: 7/119 -- train_loss: 1.3845 
2025-08-11 21:53:30,314 - INFO - Epoch: 71/500, Iter: 8/119 -- train_loss: 1.4060 
2025-08-11 21:53:31,124 - INFO - Epoch: 71/500, Iter: 9/119 -- train_loss: 1.3345 
2025-08-11 21:53:31,374 - INFO - Epoch: 71/500, Iter: 10/119 -- train_loss: 1.3840 
2025-08-11 21:53:31,598 - INFO - Epoch: 71/500, Iter: 11/119 -- train_loss: 1.3371 
2025-08-11 21:53:31,814 - INFO - Epoch: 71/500, Iter: 12/119 -- train_loss: 1.2326 
2025-08-11 21:53:32,088 - INFO - Epoch: 71/500, Iter: 13/119 -- train_loss: 1.3362 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:56:16,274 - INFO - Epoch: 72/500, Iter: 1/119 -- train_loss: 1.2773 


[1/119]   1%|           [00:00<?]

2025-08-11 21:56:17,676 - INFO - Epoch: 72/500, Iter: 2/119 -- train_loss: 1.3456 
2025-08-11 21:56:17,903 - INFO - Epoch: 72/500, Iter: 3/119 -- train_loss: 1.3452 
2025-08-11 21:56:19,681 - INFO - Epoch: 72/500, Iter: 4/119 -- train_loss: 1.2960 
2025-08-11 21:56:19,903 - INFO - Epoch: 72/500, Iter: 5/119 -- train_loss: 1.4005 
2025-08-11 21:56:20,127 - INFO - Epoch: 72/500, Iter: 6/119 -- train_loss: 1.3123 
2025-08-11 21:56:20,375 - INFO - Epoch: 72/500, Iter: 7/119 -- train_loss: 1.2391 
2025-08-11 21:56:20,606 - INFO - Epoch: 72/500, Iter: 8/119 -- train_loss: 1.6025 
2025-08-11 21:56:23,610 - INFO - Epoch: 72/500, Iter: 9/119 -- train_loss: 1.3668 
2025-08-11 21:56:23,830 - INFO - Epoch: 72/500, Iter: 10/119 -- train_loss: 1.4448 
2025-08-11 21:56:24,096 - INFO - Epoch: 72/500, Iter: 11/119 -- train_loss: 1.2758 
2025-08-11 21:56:24,562 - INFO - Epoch: 72/500, Iter: 12/119 -- train_loss: 1.3928 
2025-08-11 21:56:24,809 - INFO - Epoch: 72/500, Iter: 13/119 -- train_loss: 1.3423 


[1/20]   5%|5          [00:00<?]

2025-08-11 21:59:12,020 - INFO - Epoch: 73/500, Iter: 1/119 -- train_loss: 1.6351 


[1/119]   1%|           [00:00<?]

2025-08-11 21:59:12,267 - INFO - Epoch: 73/500, Iter: 2/119 -- train_loss: 1.3398 
2025-08-11 21:59:12,502 - INFO - Epoch: 73/500, Iter: 3/119 -- train_loss: 1.2204 
2025-08-11 21:59:12,756 - INFO - Epoch: 73/500, Iter: 4/119 -- train_loss: 1.2843 
2025-08-11 21:59:12,970 - INFO - Epoch: 73/500, Iter: 5/119 -- train_loss: 1.3172 
2025-08-11 21:59:13,232 - INFO - Epoch: 73/500, Iter: 6/119 -- train_loss: 1.3853 
2025-08-11 21:59:13,480 - INFO - Epoch: 73/500, Iter: 7/119 -- train_loss: 1.3525 
2025-08-11 21:59:13,724 - INFO - Epoch: 73/500, Iter: 8/119 -- train_loss: 1.3465 
2025-08-11 21:59:14,226 - INFO - Epoch: 73/500, Iter: 9/119 -- train_loss: 1.3859 
2025-08-11 21:59:14,466 - INFO - Epoch: 73/500, Iter: 10/119 -- train_loss: 1.4034 
2025-08-11 21:59:14,708 - INFO - Epoch: 73/500, Iter: 11/119 -- train_loss: 1.3759 
2025-08-11 21:59:14,982 - INFO - Epoch: 73/500, Iter: 12/119 -- train_loss: 1.2950 
2025-08-11 21:59:15,216 - INFO - Epoch: 73/500, Iter: 13/119 -- train_loss: 1.3296 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:02:00,609 - INFO - Epoch: 74/500, Iter: 1/119 -- train_loss: 1.3682 


[1/119]   1%|           [00:00<?]

2025-08-11 22:02:01,818 - INFO - Epoch: 74/500, Iter: 2/119 -- train_loss: 1.4322 
2025-08-11 22:02:02,287 - INFO - Epoch: 74/500, Iter: 3/119 -- train_loss: 1.7024 
2025-08-11 22:02:02,535 - INFO - Epoch: 74/500, Iter: 4/119 -- train_loss: 1.2345 
2025-08-11 22:02:02,775 - INFO - Epoch: 74/500, Iter: 5/119 -- train_loss: 1.4267 
2025-08-11 22:02:03,011 - INFO - Epoch: 74/500, Iter: 6/119 -- train_loss: 1.4108 
2025-08-11 22:02:03,257 - INFO - Epoch: 74/500, Iter: 7/119 -- train_loss: 1.3574 
2025-08-11 22:02:03,521 - INFO - Epoch: 74/500, Iter: 8/119 -- train_loss: 1.4770 
2025-08-11 22:02:07,108 - INFO - Epoch: 74/500, Iter: 9/119 -- train_loss: 1.3691 
2025-08-11 22:02:07,347 - INFO - Epoch: 74/500, Iter: 10/119 -- train_loss: 1.2863 
2025-08-11 22:02:07,564 - INFO - Epoch: 74/500, Iter: 11/119 -- train_loss: 1.3809 
2025-08-11 22:02:07,802 - INFO - Epoch: 74/500, Iter: 12/119 -- train_loss: 1.3344 
2025-08-11 22:02:08,034 - INFO - Epoch: 74/500, Iter: 13/119 -- train_loss: 1.3209 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:05:01,184 - INFO - Epoch: 75/500, Iter: 1/119 -- train_loss: 1.3226 


[1/119]   1%|           [00:00<?]

2025-08-11 22:05:01,399 - INFO - Epoch: 75/500, Iter: 2/119 -- train_loss: 1.4539 
2025-08-11 22:05:01,634 - INFO - Epoch: 75/500, Iter: 3/119 -- train_loss: 1.4238 
2025-08-11 22:05:01,878 - INFO - Epoch: 75/500, Iter: 4/119 -- train_loss: 1.4264 
2025-08-11 22:05:02,132 - INFO - Epoch: 75/500, Iter: 5/119 -- train_loss: 1.3713 
2025-08-11 22:05:02,364 - INFO - Epoch: 75/500, Iter: 6/119 -- train_loss: 1.4456 
2025-08-11 22:05:02,603 - INFO - Epoch: 75/500, Iter: 7/119 -- train_loss: 1.4039 
2025-08-11 22:05:02,838 - INFO - Epoch: 75/500, Iter: 8/119 -- train_loss: 1.3242 
2025-08-11 22:05:04,483 - INFO - Epoch: 75/500, Iter: 9/119 -- train_loss: 1.3066 
2025-08-11 22:05:04,720 - INFO - Epoch: 75/500, Iter: 10/119 -- train_loss: 1.3448 
2025-08-11 22:05:04,949 - INFO - Epoch: 75/500, Iter: 11/119 -- train_loss: 1.2904 
2025-08-11 22:05:05,177 - INFO - Epoch: 75/500, Iter: 12/119 -- train_loss: 1.3329 
2025-08-11 22:05:05,419 - INFO - Epoch: 75/500, Iter: 13/119 -- train_loss: 1.3801 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:07:49,142 - INFO - Epoch: 76/500, Iter: 1/119 -- train_loss: 1.3095 


[1/119]   1%|           [00:00<?]

2025-08-11 22:07:53,462 - INFO - Epoch: 76/500, Iter: 2/119 -- train_loss: 1.4022 
2025-08-11 22:07:53,702 - INFO - Epoch: 76/500, Iter: 3/119 -- train_loss: 1.5296 
2025-08-11 22:07:53,950 - INFO - Epoch: 76/500, Iter: 4/119 -- train_loss: 1.2796 
2025-08-11 22:07:54,191 - INFO - Epoch: 76/500, Iter: 5/119 -- train_loss: 1.2902 
2025-08-11 22:07:54,451 - INFO - Epoch: 76/500, Iter: 6/119 -- train_loss: 1.3604 
2025-08-11 22:07:54,666 - INFO - Epoch: 76/500, Iter: 7/119 -- train_loss: 1.4301 
2025-08-11 22:07:54,913 - INFO - Epoch: 76/500, Iter: 8/119 -- train_loss: 1.4120 
2025-08-11 22:07:55,167 - INFO - Epoch: 76/500, Iter: 9/119 -- train_loss: 1.4311 
2025-08-11 22:07:55,697 - INFO - Epoch: 76/500, Iter: 10/119 -- train_loss: 1.3836 
2025-08-11 22:07:55,937 - INFO - Epoch: 76/500, Iter: 11/119 -- train_loss: 1.3706 
2025-08-11 22:07:56,170 - INFO - Epoch: 76/500, Iter: 12/119 -- train_loss: 1.4122 
2025-08-11 22:07:56,423 - INFO - Epoch: 76/500, Iter: 13/119 -- train_loss: 1.2779 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:10:45,852 - INFO - Epoch: 77/500, Iter: 1/119 -- train_loss: 1.3096 


[1/119]   1%|           [00:00<?]

2025-08-11 22:10:46,106 - INFO - Epoch: 77/500, Iter: 2/119 -- train_loss: 1.3673 
2025-08-11 22:10:46,337 - INFO - Epoch: 77/500, Iter: 3/119 -- train_loss: 1.3243 
2025-08-11 22:10:46,594 - INFO - Epoch: 77/500, Iter: 4/119 -- train_loss: 1.2561 
2025-08-11 22:10:46,852 - INFO - Epoch: 77/500, Iter: 5/119 -- train_loss: 1.3784 
2025-08-11 22:10:47,097 - INFO - Epoch: 77/500, Iter: 6/119 -- train_loss: 1.4542 
2025-08-11 22:10:47,369 - INFO - Epoch: 77/500, Iter: 7/119 -- train_loss: 1.4374 
2025-08-11 22:10:47,618 - INFO - Epoch: 77/500, Iter: 8/119 -- train_loss: 1.3537 
2025-08-11 22:10:48,452 - INFO - Epoch: 77/500, Iter: 9/119 -- train_loss: 1.3436 
2025-08-11 22:10:48,695 - INFO - Epoch: 77/500, Iter: 10/119 -- train_loss: 1.2856 
2025-08-11 22:10:48,969 - INFO - Epoch: 77/500, Iter: 11/119 -- train_loss: 1.3909 
2025-08-11 22:10:49,218 - INFO - Epoch: 77/500, Iter: 12/119 -- train_loss: 1.3773 
2025-08-11 22:10:49,595 - INFO - Epoch: 77/500, Iter: 13/119 -- train_loss: 1.3935 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:13:37,227 - INFO - Epoch: 78/500, Iter: 1/119 -- train_loss: 1.4509 


[1/119]   1%|           [00:00<?]

2025-08-11 22:13:37,802 - INFO - Epoch: 78/500, Iter: 2/119 -- train_loss: 1.3296 
2025-08-11 22:13:38,061 - INFO - Epoch: 78/500, Iter: 3/119 -- train_loss: 1.4940 
2025-08-11 22:13:38,314 - INFO - Epoch: 78/500, Iter: 4/119 -- train_loss: 1.3663 
2025-08-11 22:13:38,561 - INFO - Epoch: 78/500, Iter: 5/119 -- train_loss: 1.3029 
2025-08-11 22:13:38,800 - INFO - Epoch: 78/500, Iter: 6/119 -- train_loss: 1.4311 
2025-08-11 22:13:39,071 - INFO - Epoch: 78/500, Iter: 7/119 -- train_loss: 1.3417 
2025-08-11 22:13:39,318 - INFO - Epoch: 78/500, Iter: 8/119 -- train_loss: 1.3364 
2025-08-11 22:13:42,922 - INFO - Epoch: 78/500, Iter: 9/119 -- train_loss: 1.4601 
2025-08-11 22:13:45,428 - INFO - Epoch: 78/500, Iter: 10/119 -- train_loss: 1.3054 
2025-08-11 22:13:45,669 - INFO - Epoch: 78/500, Iter: 11/119 -- train_loss: 1.3282 
2025-08-11 22:13:45,893 - INFO - Epoch: 78/500, Iter: 12/119 -- train_loss: 1.4234 
2025-08-11 22:13:46,112 - INFO - Epoch: 78/500, Iter: 13/119 -- train_loss: 1.3333 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:16:33,587 - INFO - Epoch: 79/500, Iter: 1/119 -- train_loss: 1.6996 


[1/119]   1%|           [00:00<?]

2025-08-11 22:16:33,837 - INFO - Epoch: 79/500, Iter: 2/119 -- train_loss: 1.3791 
2025-08-11 22:16:34,108 - INFO - Epoch: 79/500, Iter: 3/119 -- train_loss: 1.2732 
2025-08-11 22:16:34,584 - INFO - Epoch: 79/500, Iter: 4/119 -- train_loss: 1.3386 
2025-08-11 22:16:34,861 - INFO - Epoch: 79/500, Iter: 5/119 -- train_loss: 1.3262 
2025-08-11 22:16:35,240 - INFO - Epoch: 79/500, Iter: 6/119 -- train_loss: 1.4296 
2025-08-11 22:16:35,482 - INFO - Epoch: 79/500, Iter: 7/119 -- train_loss: 1.3788 
2025-08-11 22:16:35,750 - INFO - Epoch: 79/500, Iter: 8/119 -- train_loss: 1.3836 
2025-08-11 22:16:37,036 - INFO - Epoch: 79/500, Iter: 9/119 -- train_loss: 1.3423 
2025-08-11 22:16:37,279 - INFO - Epoch: 79/500, Iter: 10/119 -- train_loss: 1.3043 
2025-08-11 22:16:37,549 - INFO - Epoch: 79/500, Iter: 11/119 -- train_loss: 1.3854 
2025-08-11 22:16:39,304 - INFO - Epoch: 79/500, Iter: 12/119 -- train_loss: 1.4743 
2025-08-11 22:16:39,559 - INFO - Epoch: 79/500, Iter: 13/119 -- train_loss: 1.4404 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:19:39,203 - INFO - Epoch: 80/500, Iter: 1/119 -- train_loss: 1.3923 


[1/119]   1%|           [00:00<?]

2025-08-11 22:19:40,036 - INFO - Epoch: 80/500, Iter: 2/119 -- train_loss: 1.3679 
2025-08-11 22:19:40,656 - INFO - Epoch: 80/500, Iter: 3/119 -- train_loss: 1.3401 
2025-08-11 22:19:41,166 - INFO - Epoch: 80/500, Iter: 4/119 -- train_loss: 1.3418 
2025-08-11 22:19:41,669 - INFO - Epoch: 80/500, Iter: 5/119 -- train_loss: 1.4635 
2025-08-11 22:19:42,207 - INFO - Epoch: 80/500, Iter: 6/119 -- train_loss: 1.3711 
2025-08-11 22:19:42,726 - INFO - Epoch: 80/500, Iter: 7/119 -- train_loss: 1.3227 
2025-08-11 22:19:43,225 - INFO - Epoch: 80/500, Iter: 8/119 -- train_loss: 1.3964 
2025-08-11 22:19:47,779 - INFO - Epoch: 80/500, Iter: 9/119 -- train_loss: 1.3239 
2025-08-11 22:19:49,501 - INFO - Epoch: 80/500, Iter: 10/119 -- train_loss: 1.4167 
2025-08-11 22:19:50,261 - INFO - Epoch: 80/500, Iter: 11/119 -- train_loss: 1.3470 
2025-08-11 22:19:50,746 - INFO - Epoch: 80/500, Iter: 12/119 -- train_loss: 1.3248 
2025-08-11 22:19:51,021 - INFO - Epoch: 80/500, Iter: 13/119 -- train_loss: 1.3799 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:22:55,231 - INFO - Epoch: 81/500, Iter: 1/119 -- train_loss: 1.3621 


[1/119]   1%|           [00:00<?]

2025-08-11 22:22:55,518 - INFO - Epoch: 81/500, Iter: 2/119 -- train_loss: 1.3529 
2025-08-11 22:22:55,784 - INFO - Epoch: 81/500, Iter: 3/119 -- train_loss: 1.2642 
2025-08-11 22:22:56,063 - INFO - Epoch: 81/500, Iter: 4/119 -- train_loss: 1.4083 
2025-08-11 22:22:56,851 - INFO - Epoch: 81/500, Iter: 5/119 -- train_loss: 1.3393 
2025-08-11 22:22:57,091 - INFO - Epoch: 81/500, Iter: 6/119 -- train_loss: 1.2954 
2025-08-11 22:22:57,331 - INFO - Epoch: 81/500, Iter: 7/119 -- train_loss: 1.2681 
2025-08-11 22:22:57,627 - INFO - Epoch: 81/500, Iter: 8/119 -- train_loss: 1.3122 
2025-08-11 22:22:57,927 - INFO - Epoch: 81/500, Iter: 9/119 -- train_loss: 1.3425 
2025-08-11 22:22:58,392 - INFO - Epoch: 81/500, Iter: 10/119 -- train_loss: 1.3213 
2025-08-11 22:22:58,653 - INFO - Epoch: 81/500, Iter: 11/119 -- train_loss: 1.3116 
2025-08-11 22:23:00,982 - INFO - Epoch: 81/500, Iter: 12/119 -- train_loss: 1.4404 
2025-08-11 22:23:04,254 - INFO - Epoch: 81/500, Iter: 13/119 -- train_loss: 1.4534 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:26:15,641 - INFO - Epoch: 82/500, Iter: 1/119 -- train_loss: 1.3517 


[1/119]   1%|           [00:00<?]

2025-08-11 22:26:15,955 - INFO - Epoch: 82/500, Iter: 2/119 -- train_loss: 1.2828 
2025-08-11 22:26:16,203 - INFO - Epoch: 82/500, Iter: 3/119 -- train_loss: 1.4488 
2025-08-11 22:26:16,718 - INFO - Epoch: 82/500, Iter: 4/119 -- train_loss: 1.3271 
2025-08-11 22:26:16,981 - INFO - Epoch: 82/500, Iter: 5/119 -- train_loss: 1.4061 
2025-08-11 22:26:17,279 - INFO - Epoch: 82/500, Iter: 6/119 -- train_loss: 1.3905 
2025-08-11 22:26:17,596 - INFO - Epoch: 82/500, Iter: 7/119 -- train_loss: 1.3731 
2025-08-11 22:26:17,891 - INFO - Epoch: 82/500, Iter: 8/119 -- train_loss: 1.3149 
2025-08-11 22:26:20,729 - INFO - Epoch: 82/500, Iter: 9/119 -- train_loss: 1.3039 
2025-08-11 22:26:21,032 - INFO - Epoch: 82/500, Iter: 10/119 -- train_loss: 1.3354 
2025-08-11 22:26:21,345 - INFO - Epoch: 82/500, Iter: 11/119 -- train_loss: 1.3713 
2025-08-11 22:26:21,622 - INFO - Epoch: 82/500, Iter: 12/119 -- train_loss: 1.4294 
2025-08-11 22:26:21,900 - INFO - Epoch: 82/500, Iter: 13/119 -- train_loss: 1.3631 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:29:45,587 - INFO - Epoch: 83/500, Iter: 1/119 -- train_loss: 1.3397 


[1/119]   1%|           [00:00<?]

2025-08-11 22:29:49,682 - INFO - Epoch: 83/500, Iter: 2/119 -- train_loss: 1.3400 
2025-08-11 22:29:50,154 - INFO - Epoch: 83/500, Iter: 3/119 -- train_loss: 1.3011 
2025-08-11 22:29:50,696 - INFO - Epoch: 83/500, Iter: 4/119 -- train_loss: 1.3492 
2025-08-11 22:29:51,242 - INFO - Epoch: 83/500, Iter: 5/119 -- train_loss: 1.2858 
2025-08-11 22:29:51,778 - INFO - Epoch: 83/500, Iter: 6/119 -- train_loss: 1.2965 
2025-08-11 22:29:52,303 - INFO - Epoch: 83/500, Iter: 7/119 -- train_loss: 1.3151 
2025-08-11 22:29:52,930 - INFO - Epoch: 83/500, Iter: 8/119 -- train_loss: 1.3142 
2025-08-11 22:29:53,537 - INFO - Epoch: 83/500, Iter: 9/119 -- train_loss: 1.4007 
2025-08-11 22:30:01,410 - INFO - Epoch: 83/500, Iter: 10/119 -- train_loss: 1.4644 
2025-08-11 22:30:01,935 - INFO - Epoch: 83/500, Iter: 11/119 -- train_loss: 1.3374 
2025-08-11 22:30:02,481 - INFO - Epoch: 83/500, Iter: 12/119 -- train_loss: 1.3269 
2025-08-11 22:30:02,966 - INFO - Epoch: 83/500, Iter: 13/119 -- train_loss: 1.3404 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:33:48,787 - INFO - Epoch: 84/500, Iter: 1/119 -- train_loss: 1.4328 


[1/119]   1%|           [00:00<?]

2025-08-11 22:33:49,436 - INFO - Epoch: 84/500, Iter: 2/119 -- train_loss: 1.2752 
2025-08-11 22:33:50,263 - INFO - Epoch: 84/500, Iter: 3/119 -- train_loss: 1.2810 
2025-08-11 22:33:50,725 - INFO - Epoch: 84/500, Iter: 4/119 -- train_loss: 1.3579 
2025-08-11 22:33:51,197 - INFO - Epoch: 84/500, Iter: 5/119 -- train_loss: 1.2642 
2025-08-11 22:33:51,729 - INFO - Epoch: 84/500, Iter: 6/119 -- train_loss: 1.3864 
2025-08-11 22:33:54,840 - INFO - Epoch: 84/500, Iter: 7/119 -- train_loss: 1.4015 
2025-08-11 22:33:55,260 - INFO - Epoch: 84/500, Iter: 8/119 -- train_loss: 1.3544 
2025-08-11 22:33:57,602 - INFO - Epoch: 84/500, Iter: 9/119 -- train_loss: 1.2751 
2025-08-11 22:33:59,523 - INFO - Epoch: 84/500, Iter: 10/119 -- train_loss: 1.4069 
2025-08-11 22:34:04,295 - INFO - Epoch: 84/500, Iter: 11/119 -- train_loss: 1.3656 
2025-08-11 22:34:04,688 - INFO - Epoch: 84/500, Iter: 12/119 -- train_loss: 1.4747 
2025-08-11 22:34:05,116 - INFO - Epoch: 84/500, Iter: 13/119 -- train_loss: 1.3593 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:37:50,029 - INFO - Epoch: 85/500, Iter: 1/119 -- train_loss: 1.3825 


[1/119]   1%|           [00:00<?]

2025-08-11 22:37:50,902 - INFO - Epoch: 85/500, Iter: 2/119 -- train_loss: 1.3846 
2025-08-11 22:37:54,117 - INFO - Epoch: 85/500, Iter: 3/119 -- train_loss: 1.3076 
2025-08-11 22:37:54,586 - INFO - Epoch: 85/500, Iter: 4/119 -- train_loss: 1.3985 
2025-08-11 22:37:58,409 - INFO - Epoch: 85/500, Iter: 5/119 -- train_loss: 1.4472 
2025-08-11 22:37:58,829 - INFO - Epoch: 85/500, Iter: 6/119 -- train_loss: 1.3769 
2025-08-11 22:37:59,296 - INFO - Epoch: 85/500, Iter: 7/119 -- train_loss: 1.2994 
2025-08-11 22:37:59,789 - INFO - Epoch: 85/500, Iter: 8/119 -- train_loss: 1.3293 
2025-08-11 22:38:00,322 - INFO - Epoch: 85/500, Iter: 9/119 -- train_loss: 1.3896 
2025-08-11 22:38:06,286 - INFO - Epoch: 85/500, Iter: 10/119 -- train_loss: 1.3937 
2025-08-11 22:38:06,666 - INFO - Epoch: 85/500, Iter: 11/119 -- train_loss: 1.2729 
2025-08-11 22:38:07,381 - INFO - Epoch: 85/500, Iter: 12/119 -- train_loss: 1.3774 
2025-08-11 22:38:07,887 - INFO - Epoch: 85/500, Iter: 13/119 -- train_loss: 1.4246 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:42:19,251 - INFO - Epoch: 86/500, Iter: 1/119 -- train_loss: 1.3264 


[1/119]   1%|           [00:00<?]

2025-08-11 22:42:20,266 - INFO - Epoch: 86/500, Iter: 2/119 -- train_loss: 1.2802 
2025-08-11 22:42:20,912 - INFO - Epoch: 86/500, Iter: 3/119 -- train_loss: 1.2565 
2025-08-11 22:42:21,472 - INFO - Epoch: 86/500, Iter: 4/119 -- train_loss: 1.2733 
2025-08-11 22:42:21,765 - INFO - Epoch: 86/500, Iter: 5/119 -- train_loss: 1.3502 
2025-08-11 22:42:22,071 - INFO - Epoch: 86/500, Iter: 6/119 -- train_loss: 1.3740 
2025-08-11 22:42:22,425 - INFO - Epoch: 86/500, Iter: 7/119 -- train_loss: 1.3615 
2025-08-11 22:42:23,996 - INFO - Epoch: 86/500, Iter: 8/119 -- train_loss: 1.4082 
2025-08-11 22:42:24,962 - INFO - Epoch: 86/500, Iter: 9/119 -- train_loss: 1.3400 
2025-08-11 22:42:25,272 - INFO - Epoch: 86/500, Iter: 10/119 -- train_loss: 1.2748 
2025-08-11 22:42:25,563 - INFO - Epoch: 86/500, Iter: 11/119 -- train_loss: 1.3225 
2025-08-11 22:42:28,433 - INFO - Epoch: 86/500, Iter: 12/119 -- train_loss: 1.4361 
2025-08-11 22:42:28,767 - INFO - Epoch: 86/500, Iter: 13/119 -- train_loss: 1.3294 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:46:01,164 - INFO - Epoch: 87/500, Iter: 1/119 -- train_loss: 1.3351 


[1/119]   1%|           [00:00<?]

2025-08-11 22:46:01,648 - INFO - Epoch: 87/500, Iter: 2/119 -- train_loss: 1.4279 
2025-08-11 22:46:02,114 - INFO - Epoch: 87/500, Iter: 3/119 -- train_loss: 1.3437 
2025-08-11 22:46:02,687 - INFO - Epoch: 87/500, Iter: 4/119 -- train_loss: 1.3235 
2025-08-11 22:46:03,206 - INFO - Epoch: 87/500, Iter: 5/119 -- train_loss: 1.4065 
2025-08-11 22:46:03,638 - INFO - Epoch: 87/500, Iter: 6/119 -- train_loss: 1.3605 
2025-08-11 22:46:05,564 - INFO - Epoch: 87/500, Iter: 7/119 -- train_loss: 1.3391 
2025-08-11 22:46:06,201 - INFO - Epoch: 87/500, Iter: 8/119 -- train_loss: 1.4542 
2025-08-11 22:46:13,443 - INFO - Epoch: 87/500, Iter: 9/119 -- train_loss: 1.3488 
2025-08-11 22:46:14,607 - INFO - Epoch: 87/500, Iter: 10/119 -- train_loss: 1.3125 
2025-08-11 22:46:15,324 - INFO - Epoch: 87/500, Iter: 11/119 -- train_loss: 1.3403 
2025-08-11 22:46:15,773 - INFO - Epoch: 87/500, Iter: 12/119 -- train_loss: 1.3583 
2025-08-11 22:46:16,204 - INFO - Epoch: 87/500, Iter: 13/119 -- train_loss: 1.3182 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:49:37,162 - INFO - Epoch: 88/500, Iter: 1/119 -- train_loss: 1.3383 


[1/119]   1%|           [00:00<?]

2025-08-11 22:49:37,448 - INFO - Epoch: 88/500, Iter: 2/119 -- train_loss: 1.3491 
2025-08-11 22:49:37,747 - INFO - Epoch: 88/500, Iter: 3/119 -- train_loss: 1.4516 
2025-08-11 22:49:38,021 - INFO - Epoch: 88/500, Iter: 4/119 -- train_loss: 1.4265 
2025-08-11 22:49:38,284 - INFO - Epoch: 88/500, Iter: 5/119 -- train_loss: 1.4134 
2025-08-11 22:49:38,561 - INFO - Epoch: 88/500, Iter: 6/119 -- train_loss: 1.3682 
2025-08-11 22:49:38,820 - INFO - Epoch: 88/500, Iter: 7/119 -- train_loss: 1.3288 
2025-08-11 22:49:39,114 - INFO - Epoch: 88/500, Iter: 8/119 -- train_loss: 1.3853 
2025-08-11 22:49:42,501 - INFO - Epoch: 88/500, Iter: 9/119 -- train_loss: 1.3187 
2025-08-11 22:49:42,769 - INFO - Epoch: 88/500, Iter: 10/119 -- train_loss: 1.3200 
2025-08-11 22:49:43,054 - INFO - Epoch: 88/500, Iter: 11/119 -- train_loss: 1.2377 
2025-08-11 22:49:43,318 - INFO - Epoch: 88/500, Iter: 12/119 -- train_loss: 1.3272 
2025-08-11 22:49:43,581 - INFO - Epoch: 88/500, Iter: 13/119 -- train_loss: 1.2691 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:53:07,173 - INFO - Epoch: 89/500, Iter: 1/119 -- train_loss: 1.3685 


[1/119]   1%|           [00:00<?]

2025-08-11 22:53:10,632 - INFO - Epoch: 89/500, Iter: 2/119 -- train_loss: 1.4106 
2025-08-11 22:53:10,934 - INFO - Epoch: 89/500, Iter: 3/119 -- train_loss: 1.3466 
2025-08-11 22:53:11,209 - INFO - Epoch: 89/500, Iter: 4/119 -- train_loss: 1.3006 
2025-08-11 22:53:11,485 - INFO - Epoch: 89/500, Iter: 5/119 -- train_loss: 1.3454 
2025-08-11 22:53:11,750 - INFO - Epoch: 89/500, Iter: 6/119 -- train_loss: 1.2802 
2025-08-11 22:53:12,023 - INFO - Epoch: 89/500, Iter: 7/119 -- train_loss: 1.4252 
2025-08-11 22:53:12,310 - INFO - Epoch: 89/500, Iter: 8/119 -- train_loss: 1.3106 
2025-08-11 22:53:12,615 - INFO - Epoch: 89/500, Iter: 9/119 -- train_loss: 1.3572 
2025-08-11 22:53:13,645 - INFO - Epoch: 89/500, Iter: 10/119 -- train_loss: 1.3354 
2025-08-11 22:53:13,960 - INFO - Epoch: 89/500, Iter: 11/119 -- train_loss: 1.4477 
2025-08-11 22:53:14,220 - INFO - Epoch: 89/500, Iter: 12/119 -- train_loss: 1.5443 
2025-08-11 22:53:14,600 - INFO - Epoch: 89/500, Iter: 13/119 -- train_loss: 1.3551 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:56:28,822 - INFO - Epoch: 90/500, Iter: 1/119 -- train_loss: 1.4090 


[1/119]   1%|           [00:00<?]

2025-08-11 22:56:30,395 - INFO - Epoch: 90/500, Iter: 2/119 -- train_loss: 1.3929 
2025-08-11 22:56:30,648 - INFO - Epoch: 90/500, Iter: 3/119 -- train_loss: 1.2842 
2025-08-11 22:56:30,888 - INFO - Epoch: 90/500, Iter: 4/119 -- train_loss: 1.3853 
2025-08-11 22:56:31,121 - INFO - Epoch: 90/500, Iter: 5/119 -- train_loss: 1.3420 
2025-08-11 22:56:31,358 - INFO - Epoch: 90/500, Iter: 6/119 -- train_loss: 1.4546 
2025-08-11 22:56:31,595 - INFO - Epoch: 90/500, Iter: 7/119 -- train_loss: 1.3463 
2025-08-11 22:56:31,856 - INFO - Epoch: 90/500, Iter: 8/119 -- train_loss: 1.3816 
2025-08-11 22:56:32,849 - INFO - Epoch: 90/500, Iter: 9/119 -- train_loss: 1.4605 
2025-08-11 22:56:33,534 - INFO - Epoch: 90/500, Iter: 10/119 -- train_loss: 1.3291 
2025-08-11 22:56:33,769 - INFO - Epoch: 90/500, Iter: 11/119 -- train_loss: 1.2297 
2025-08-11 22:56:34,013 - INFO - Epoch: 90/500, Iter: 12/119 -- train_loss: 1.4380 
2025-08-11 22:56:34,242 - INFO - Epoch: 90/500, Iter: 13/119 -- train_loss: 1.3313 


[1/20]   5%|5          [00:00<?]

2025-08-11 22:59:20,778 - INFO - Epoch: 91/500, Iter: 1/119 -- train_loss: 1.2759 


[1/119]   1%|           [00:00<?]

2025-08-11 22:59:21,030 - INFO - Epoch: 91/500, Iter: 2/119 -- train_loss: 1.3341 
2025-08-11 22:59:21,258 - INFO - Epoch: 91/500, Iter: 3/119 -- train_loss: 1.3389 
2025-08-11 22:59:21,525 - INFO - Epoch: 91/500, Iter: 4/119 -- train_loss: 1.3268 
2025-08-11 22:59:21,758 - INFO - Epoch: 91/500, Iter: 5/119 -- train_loss: 1.5985 
2025-08-11 22:59:22,035 - INFO - Epoch: 91/500, Iter: 6/119 -- train_loss: 1.4186 
2025-08-11 22:59:22,280 - INFO - Epoch: 91/500, Iter: 7/119 -- train_loss: 1.4058 
2025-08-11 22:59:22,507 - INFO - Epoch: 91/500, Iter: 8/119 -- train_loss: 1.5585 
2025-08-11 22:59:26,281 - INFO - Epoch: 91/500, Iter: 9/119 -- train_loss: 1.3750 
2025-08-11 22:59:26,518 - INFO - Epoch: 91/500, Iter: 10/119 -- train_loss: 1.3932 
2025-08-11 22:59:26,771 - INFO - Epoch: 91/500, Iter: 11/119 -- train_loss: 1.3876 
2025-08-11 22:59:27,002 - INFO - Epoch: 91/500, Iter: 12/119 -- train_loss: 1.3317 
2025-08-11 22:59:27,215 - INFO - Epoch: 91/500, Iter: 13/119 -- train_loss: 1.3955 


[1/20]   5%|5          [00:00<?]

2025-08-11 23:02:08,901 - INFO - Epoch: 92/500, Iter: 1/119 -- train_loss: 1.4129 


[1/119]   1%|           [00:00<?]

2025-08-11 23:02:09,434 - INFO - Epoch: 92/500, Iter: 2/119 -- train_loss: 1.3091 
2025-08-11 23:02:10,014 - INFO - Epoch: 92/500, Iter: 3/119 -- train_loss: 1.3970 
2025-08-11 23:02:10,361 - INFO - Epoch: 92/500, Iter: 4/119 -- train_loss: 1.2533 
2025-08-11 23:02:10,623 - INFO - Epoch: 92/500, Iter: 5/119 -- train_loss: 1.3457 
2025-08-11 23:02:10,867 - INFO - Epoch: 92/500, Iter: 6/119 -- train_loss: 1.2053 
2025-08-11 23:02:11,101 - INFO - Epoch: 92/500, Iter: 7/119 -- train_loss: 1.3326 
2025-08-11 23:02:11,354 - INFO - Epoch: 92/500, Iter: 8/119 -- train_loss: 1.4443 
2025-08-11 23:02:14,645 - INFO - Epoch: 92/500, Iter: 9/119 -- train_loss: 1.4220 
2025-08-11 23:02:14,878 - INFO - Epoch: 92/500, Iter: 10/119 -- train_loss: 1.2967 
2025-08-11 23:02:15,101 - INFO - Epoch: 92/500, Iter: 11/119 -- train_loss: 1.3467 
2025-08-11 23:02:15,367 - INFO - Epoch: 92/500, Iter: 12/119 -- train_loss: 1.3110 
2025-08-11 23:02:15,633 - INFO - Epoch: 92/500, Iter: 13/119 -- train_loss: 1.2791 


[1/20]   5%|5          [00:00<?]

2025-08-11 23:05:02,730 - INFO - Epoch: 93/500, Iter: 1/119 -- train_loss: 1.2677 


[1/119]   1%|           [00:00<?]

2025-08-11 23:05:05,604 - INFO - Epoch: 93/500, Iter: 2/119 -- train_loss: 1.3493 
2025-08-11 23:05:06,097 - INFO - Epoch: 93/500, Iter: 3/119 -- train_loss: 1.3757 
2025-08-11 23:05:06,293 - INFO - Epoch: 93/500, Iter: 4/119 -- train_loss: 1.3677 
2025-08-11 23:05:06,522 - INFO - Epoch: 93/500, Iter: 5/119 -- train_loss: 1.2408 
2025-08-11 23:05:06,739 - INFO - Epoch: 93/500, Iter: 6/119 -- train_loss: 1.3248 
2025-08-11 23:05:06,948 - INFO - Epoch: 93/500, Iter: 7/119 -- train_loss: 1.3846 
2025-08-11 23:05:07,187 - INFO - Epoch: 93/500, Iter: 8/119 -- train_loss: 1.2801 
2025-08-11 23:05:07,403 - INFO - Epoch: 93/500, Iter: 9/119 -- train_loss: 1.3528 
2025-08-11 23:05:07,652 - INFO - Epoch: 93/500, Iter: 10/119 -- train_loss: 1.3010 
2025-08-11 23:05:08,898 - INFO - Epoch: 93/500, Iter: 11/119 -- train_loss: 1.6897 
2025-08-11 23:05:09,181 - INFO - Epoch: 93/500, Iter: 12/119 -- train_loss: 1.3529 
2025-08-11 23:05:09,392 - INFO - Epoch: 93/500, Iter: 13/119 -- train_loss: 1.4016 


[1/20]   5%|5          [00:00<?]

2025-08-11 23:07:47,102 - INFO - Epoch: 94/500, Iter: 1/119 -- train_loss: 1.6610 


[1/119]   1%|           [00:00<?]

2025-08-11 23:07:47,319 - INFO - Epoch: 94/500, Iter: 2/119 -- train_loss: 1.3057 
2025-08-11 23:07:47,525 - INFO - Epoch: 94/500, Iter: 3/119 -- train_loss: 1.4077 
2025-08-11 23:07:47,736 - INFO - Epoch: 94/500, Iter: 4/119 -- train_loss: 1.3096 
2025-08-11 23:07:48,009 - INFO - Epoch: 94/500, Iter: 5/119 -- train_loss: 1.3653 
2025-08-11 23:07:48,240 - INFO - Epoch: 94/500, Iter: 6/119 -- train_loss: 1.2952 
2025-08-11 23:07:48,480 - INFO - Epoch: 94/500, Iter: 7/119 -- train_loss: 1.5079 
2025-08-11 23:07:48,719 - INFO - Epoch: 94/500, Iter: 8/119 -- train_loss: 1.3085 
2025-08-11 23:07:49,883 - INFO - Epoch: 94/500, Iter: 9/119 -- train_loss: 1.3442 
2025-08-11 23:07:50,106 - INFO - Epoch: 94/500, Iter: 10/119 -- train_loss: 1.3908 
2025-08-11 23:07:50,317 - INFO - Epoch: 94/500, Iter: 11/119 -- train_loss: 1.2954 
2025-08-11 23:07:50,552 - INFO - Epoch: 94/500, Iter: 12/119 -- train_loss: 1.4776 
2025-08-11 23:07:50,780 - INFO - Epoch: 94/500, Iter: 13/119 -- train_loss: 1.2844 


[1/20]   5%|5          [00:00<?]

2025-08-11 23:10:34,012 - INFO - Epoch: 95/500, Iter: 1/119 -- train_loss: 1.2767 


[1/119]   1%|           [00:00<?]

2025-08-11 23:10:37,779 - INFO - Epoch: 95/500, Iter: 2/119 -- train_loss: 1.4628 
2025-08-11 23:10:38,038 - INFO - Epoch: 95/500, Iter: 3/119 -- train_loss: 1.3604 
2025-08-11 23:10:38,262 - INFO - Epoch: 95/500, Iter: 4/119 -- train_loss: 1.2358 
2025-08-11 23:10:38,478 - INFO - Epoch: 95/500, Iter: 5/119 -- train_loss: 1.3075 
2025-08-11 23:10:38,710 - INFO - Epoch: 95/500, Iter: 6/119 -- train_loss: 1.3707 
2025-08-11 23:10:38,938 - INFO - Epoch: 95/500, Iter: 7/119 -- train_loss: 1.4234 
2025-08-11 23:10:39,171 - INFO - Epoch: 95/500, Iter: 8/119 -- train_loss: 1.3590 
2025-08-11 23:10:39,445 - INFO - Epoch: 95/500, Iter: 9/119 -- train_loss: 1.4796 
2025-08-11 23:10:39,705 - INFO - Epoch: 95/500, Iter: 10/119 -- train_loss: 1.3500 
2025-08-11 23:10:39,995 - INFO - Epoch: 95/500, Iter: 11/119 -- train_loss: 1.4431 
2025-08-11 23:10:40,838 - INFO - Epoch: 95/500, Iter: 12/119 -- train_loss: 1.4677 
2025-08-11 23:10:41,067 - INFO - Epoch: 95/500, Iter: 13/119 -- train_loss: 1.4492 


[1/20]   5%|5          [00:00<?]

2025-08-11 23:13:19,357 - INFO - Epoch: 96/500, Iter: 1/119 -- train_loss: 1.3079 


[1/119]   1%|           [00:00<?]

2025-08-11 23:13:19,625 - INFO - Epoch: 96/500, Iter: 2/119 -- train_loss: 1.2567 
2025-08-11 23:13:21,815 - INFO - Epoch: 96/500, Iter: 3/119 -- train_loss: 1.4026 
2025-08-11 23:13:22,074 - INFO - Epoch: 96/500, Iter: 4/119 -- train_loss: 1.2915 
2025-08-11 23:13:22,341 - INFO - Epoch: 96/500, Iter: 5/119 -- train_loss: 1.3652 
2025-08-11 23:13:22,574 - INFO - Epoch: 96/500, Iter: 6/119 -- train_loss: 1.3341 
2025-08-11 23:13:22,866 - INFO - Epoch: 96/500, Iter: 7/119 -- train_loss: 1.3389 
2025-08-11 23:13:23,106 - INFO - Epoch: 96/500, Iter: 8/119 -- train_loss: 1.3313 
2025-08-11 23:13:23,359 - INFO - Epoch: 96/500, Iter: 9/119 -- train_loss: 1.2864 
2025-08-11 23:13:23,608 - INFO - Epoch: 96/500, Iter: 10/119 -- train_loss: 1.2422 
2025-08-11 23:13:25,401 - INFO - Epoch: 96/500, Iter: 11/119 -- train_loss: 1.4775 
2025-08-11 23:13:25,637 - INFO - Epoch: 96/500, Iter: 12/119 -- train_loss: 1.3251 
2025-08-11 23:13:25,899 - INFO - Epoch: 96/500, Iter: 13/119 -- train_loss: 1.3704 


[1/20]   5%|5          [00:00<?]

2025-08-11 23:16:10,913 - INFO - Epoch: 97/500, Iter: 1/119 -- train_loss: 1.3693 


[1/119]   1%|           [00:00<?]

2025-08-11 23:16:11,163 - INFO - Epoch: 97/500, Iter: 2/119 -- train_loss: 1.2744 
2025-08-11 23:16:11,419 - INFO - Epoch: 97/500, Iter: 3/119 -- train_loss: 1.3270 
2025-08-11 23:16:11,687 - INFO - Epoch: 97/500, Iter: 4/119 -- train_loss: 1.3813 
2025-08-11 23:16:11,903 - INFO - Epoch: 97/500, Iter: 5/119 -- train_loss: 1.2416 
2025-08-11 23:16:12,125 - INFO - Epoch: 97/500, Iter: 6/119 -- train_loss: 1.4046 
2025-08-11 23:16:12,347 - INFO - Epoch: 97/500, Iter: 7/119 -- train_loss: 1.3614 
2025-08-11 23:16:12,620 - INFO - Epoch: 97/500, Iter: 8/119 -- train_loss: 1.3432 
2025-08-11 23:16:15,013 - INFO - Epoch: 97/500, Iter: 9/119 -- train_loss: 1.4036 
2025-08-11 23:16:15,248 - INFO - Epoch: 97/500, Iter: 10/119 -- train_loss: 1.4372 
2025-08-11 23:16:16,257 - INFO - Epoch: 97/500, Iter: 11/119 -- train_loss: 1.4268 
2025-08-11 23:16:16,765 - INFO - Epoch: 97/500, Iter: 12/119 -- train_loss: 1.3186 
2025-08-11 23:16:16,999 - INFO - Epoch: 97/500, Iter: 13/119 -- train_loss: 1.2443 


[1/20]   5%|5          [00:00<?]

2025-08-11 23:18:57,475 - INFO - Epoch: 98/500, Iter: 1/119 -- train_loss: 1.2881 


[1/119]   1%|           [00:00<?]

2025-08-11 23:18:57,742 - INFO - Epoch: 98/500, Iter: 2/119 -- train_loss: 1.3576 
2025-08-11 23:18:58,124 - INFO - Epoch: 98/500, Iter: 3/119 -- train_loss: 1.3450 
2025-08-11 23:18:58,356 - INFO - Epoch: 98/500, Iter: 4/119 -- train_loss: 1.3398 
2025-08-11 23:18:58,606 - INFO - Epoch: 98/500, Iter: 5/119 -- train_loss: 1.3682 
2025-08-11 23:18:58,866 - INFO - Epoch: 98/500, Iter: 6/119 -- train_loss: 1.2648 
2025-08-11 23:18:59,123 - INFO - Epoch: 98/500, Iter: 7/119 -- train_loss: 1.2929 
2025-08-11 23:18:59,399 - INFO - Epoch: 98/500, Iter: 8/119 -- train_loss: 1.3958 
2025-08-11 23:19:02,157 - INFO - Epoch: 98/500, Iter: 9/119 -- train_loss: 1.3593 
2025-08-11 23:19:02,414 - INFO - Epoch: 98/500, Iter: 10/119 -- train_loss: 1.3172 
2025-08-11 23:19:02,695 - INFO - Epoch: 98/500, Iter: 11/119 -- train_loss: 1.3326 
2025-08-11 23:19:02,923 - INFO - Epoch: 98/500, Iter: 12/119 -- train_loss: 1.4015 
2025-08-11 23:19:03,173 - INFO - Epoch: 98/500, Iter: 13/119 -- train_loss: 1.3693 


[1/20]   5%|5          [00:00<?]

2025-08-11 23:21:46,788 - INFO - Epoch: 99/500, Iter: 1/119 -- train_loss: 1.3673 


[1/119]   1%|           [00:00<?]

2025-08-11 23:21:47,093 - INFO - Epoch: 99/500, Iter: 2/119 -- train_loss: 1.4390 
2025-08-11 23:21:47,350 - INFO - Epoch: 99/500, Iter: 3/119 -- train_loss: 1.3761 
2025-08-11 23:21:48,575 - INFO - Epoch: 99/500, Iter: 4/119 -- train_loss: 1.3661 
2025-08-11 23:21:48,793 - INFO - Epoch: 99/500, Iter: 5/119 -- train_loss: 1.3874 
2025-08-11 23:21:49,046 - INFO - Epoch: 99/500, Iter: 6/119 -- train_loss: 1.3396 
2025-08-11 23:21:49,273 - INFO - Epoch: 99/500, Iter: 7/119 -- train_loss: 1.2825 
2025-08-11 23:21:49,504 - INFO - Epoch: 99/500, Iter: 8/119 -- train_loss: 1.4203 
2025-08-11 23:21:53,321 - INFO - Epoch: 99/500, Iter: 9/119 -- train_loss: 1.3743 
2025-08-11 23:21:53,551 - INFO - Epoch: 99/500, Iter: 10/119 -- train_loss: 1.2901 
2025-08-11 23:21:53,761 - INFO - Epoch: 99/500, Iter: 11/119 -- train_loss: 1.3089 
2025-08-11 23:21:53,994 - INFO - Epoch: 99/500, Iter: 12/119 -- train_loss: 1.2646 
2025-08-11 23:21:54,232 - INFO - Epoch: 99/500, Iter: 13/119 -- train_loss: 1.3922 


[1/20]   5%|5          [00:00<?]

2025-08-11 23:25:07,598 - INFO - Epoch: 100/500, Iter: 1/119 -- train_loss: 1.4659 


[1/119]   1%|           [00:00<?]

2025-08-11 23:25:08,253 - INFO - Epoch: 100/500, Iter: 2/119 -- train_loss: 1.3414 
2025-08-11 23:25:08,711 - INFO - Epoch: 100/500, Iter: 3/119 -- train_loss: 1.3061 
2025-08-11 23:25:09,165 - INFO - Epoch: 100/500, Iter: 4/119 -- train_loss: 1.3668 
2025-08-11 23:25:09,611 - INFO - Epoch: 100/500, Iter: 5/119 -- train_loss: 1.4405 
2025-08-11 23:25:10,022 - INFO - Epoch: 100/500, Iter: 6/119 -- train_loss: 1.4888 
2025-08-11 23:25:10,447 - INFO - Epoch: 100/500, Iter: 7/119 -- train_loss: 1.4547 
2025-08-11 23:25:10,927 - INFO - Epoch: 100/500, Iter: 8/119 -- train_loss: 1.4422 
2025-08-11 23:25:15,615 - INFO - Epoch: 100/500, Iter: 9/119 -- train_loss: 1.2938 
2025-08-11 23:25:16,197 - INFO - Epoch: 100/500, Iter: 10/119 -- train_loss: 1.3813 
2025-08-11 23:25:16,795 - INFO - Epoch: 100/500, Iter: 11/119 -- train_loss: 1.4383 
2025-08-11 23:25:17,123 - INFO - Epoch: 100/500, Iter: 12/119 -- train_loss: 1.3147 
2025-08-11 23:25:17,444 - INFO - Epoch: 100/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:28:45,130 - INFO - Epoch: 101/500, Iter: 1/119 -- train_loss: 1.3465 


[1/119]   1%|           [00:00<?]

2025-08-11 23:28:45,523 - INFO - Epoch: 101/500, Iter: 2/119 -- train_loss: 1.2450 
2025-08-11 23:28:46,102 - INFO - Epoch: 101/500, Iter: 3/119 -- train_loss: 1.3206 
2025-08-11 23:28:49,442 - INFO - Epoch: 101/500, Iter: 4/119 -- train_loss: 1.3196 
2025-08-11 23:28:49,713 - INFO - Epoch: 101/500, Iter: 5/119 -- train_loss: 1.2834 
2025-08-11 23:28:49,978 - INFO - Epoch: 101/500, Iter: 6/119 -- train_loss: 1.3007 
2025-08-11 23:28:50,241 - INFO - Epoch: 101/500, Iter: 7/119 -- train_loss: 1.4457 
2025-08-11 23:28:50,513 - INFO - Epoch: 101/500, Iter: 8/119 -- train_loss: 1.3410 
2025-08-11 23:28:51,622 - INFO - Epoch: 101/500, Iter: 9/119 -- train_loss: 1.4021 
2025-08-11 23:28:51,943 - INFO - Epoch: 101/500, Iter: 10/119 -- train_loss: 1.3082 
2025-08-11 23:28:52,196 - INFO - Epoch: 101/500, Iter: 11/119 -- train_loss: 1.3098 
2025-08-11 23:28:54,743 - INFO - Epoch: 101/500, Iter: 12/119 -- train_loss: 1.2680 
2025-08-11 23:28:55,035 - INFO - Epoch: 101/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:32:11,421 - INFO - Epoch: 102/500, Iter: 1/119 -- train_loss: 1.2754 


[1/119]   1%|           [00:00<?]

2025-08-11 23:32:12,048 - INFO - Epoch: 102/500, Iter: 2/119 -- train_loss: 1.3881 
2025-08-11 23:32:13,687 - INFO - Epoch: 102/500, Iter: 3/119 -- train_loss: 1.4266 
2025-08-11 23:32:13,904 - INFO - Epoch: 102/500, Iter: 4/119 -- train_loss: 1.3955 
2025-08-11 23:32:14,128 - INFO - Epoch: 102/500, Iter: 5/119 -- train_loss: 1.3559 
2025-08-11 23:32:14,363 - INFO - Epoch: 102/500, Iter: 6/119 -- train_loss: 1.3224 
2025-08-11 23:32:14,617 - INFO - Epoch: 102/500, Iter: 7/119 -- train_loss: 1.3050 
2025-08-11 23:32:14,847 - INFO - Epoch: 102/500, Iter: 8/119 -- train_loss: 1.2750 
2025-08-11 23:32:15,974 - INFO - Epoch: 102/500, Iter: 9/119 -- train_loss: 1.3603 
2025-08-11 23:32:16,222 - INFO - Epoch: 102/500, Iter: 10/119 -- train_loss: 1.2489 
2025-08-11 23:32:16,474 - INFO - Epoch: 102/500, Iter: 11/119 -- train_loss: 1.2905 
2025-08-11 23:32:16,709 - INFO - Epoch: 102/500, Iter: 12/119 -- train_loss: 1.2877 
2025-08-11 23:32:16,936 - INFO - Epoch: 102/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:35:23,339 - INFO - Epoch: 103/500, Iter: 1/119 -- train_loss: 1.3169 


[1/119]   1%|           [00:00<?]

2025-08-11 23:35:23,568 - INFO - Epoch: 103/500, Iter: 2/119 -- train_loss: 1.3424 
2025-08-11 23:35:24,139 - INFO - Epoch: 103/500, Iter: 3/119 -- train_loss: 1.3736 
2025-08-11 23:35:24,366 - INFO - Epoch: 103/500, Iter: 4/119 -- train_loss: 1.3835 
2025-08-11 23:35:24,599 - INFO - Epoch: 103/500, Iter: 5/119 -- train_loss: 1.3015 
2025-08-11 23:35:26,426 - INFO - Epoch: 103/500, Iter: 6/119 -- train_loss: 1.4249 
2025-08-11 23:35:26,628 - INFO - Epoch: 103/500, Iter: 7/119 -- train_loss: 1.2709 
2025-08-11 23:35:26,882 - INFO - Epoch: 103/500, Iter: 8/119 -- train_loss: 1.3862 
2025-08-11 23:35:27,128 - INFO - Epoch: 103/500, Iter: 9/119 -- train_loss: 1.3494 
2025-08-11 23:35:28,583 - INFO - Epoch: 103/500, Iter: 10/119 -- train_loss: 1.3415 
2025-08-11 23:35:28,806 - INFO - Epoch: 103/500, Iter: 11/119 -- train_loss: 1.2953 
2025-08-11 23:35:29,050 - INFO - Epoch: 103/500, Iter: 12/119 -- train_loss: 1.3421 
2025-08-11 23:35:29,293 - INFO - Epoch: 103/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:37:59,889 - INFO - Epoch: 104/500, Iter: 1/119 -- train_loss: 1.4558 


[1/119]   1%|           [00:00<?]

2025-08-11 23:38:00,232 - INFO - Epoch: 104/500, Iter: 2/119 -- train_loss: 1.3879 
2025-08-11 23:38:00,987 - INFO - Epoch: 104/500, Iter: 3/119 -- train_loss: 1.4214 
2025-08-11 23:38:04,221 - INFO - Epoch: 104/500, Iter: 4/119 -- train_loss: 1.3030 
2025-08-11 23:38:04,443 - INFO - Epoch: 104/500, Iter: 5/119 -- train_loss: 1.2622 
2025-08-11 23:38:04,689 - INFO - Epoch: 104/500, Iter: 6/119 -- train_loss: 1.2655 
2025-08-11 23:38:04,889 - INFO - Epoch: 104/500, Iter: 7/119 -- train_loss: 1.3500 
2025-08-11 23:38:05,109 - INFO - Epoch: 104/500, Iter: 8/119 -- train_loss: 1.3181 
2025-08-11 23:38:05,342 - INFO - Epoch: 104/500, Iter: 9/119 -- train_loss: 1.2718 
2025-08-11 23:38:05,565 - INFO - Epoch: 104/500, Iter: 10/119 -- train_loss: 1.4043 
2025-08-11 23:38:05,820 - INFO - Epoch: 104/500, Iter: 11/119 -- train_loss: 1.3499 
2025-08-11 23:38:06,187 - INFO - Epoch: 104/500, Iter: 12/119 -- train_loss: 1.4248 
2025-08-11 23:38:06,422 - INFO - Epoch: 104/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:40:38,473 - INFO - Epoch: 105/500, Iter: 1/119 -- train_loss: 1.3423 


[1/119]   1%|           [00:00<?]

2025-08-11 23:40:38,698 - INFO - Epoch: 105/500, Iter: 2/119 -- train_loss: 1.3982 
2025-08-11 23:40:38,943 - INFO - Epoch: 105/500, Iter: 3/119 -- train_loss: 1.2833 
2025-08-11 23:40:39,165 - INFO - Epoch: 105/500, Iter: 4/119 -- train_loss: 1.3384 
2025-08-11 23:40:39,387 - INFO - Epoch: 105/500, Iter: 5/119 -- train_loss: 1.3447 
2025-08-11 23:40:39,618 - INFO - Epoch: 105/500, Iter: 6/119 -- train_loss: 1.3850 
2025-08-11 23:40:39,851 - INFO - Epoch: 105/500, Iter: 7/119 -- train_loss: 1.4373 
2025-08-11 23:40:40,064 - INFO - Epoch: 105/500, Iter: 8/119 -- train_loss: 1.3453 
2025-08-11 23:40:43,991 - INFO - Epoch: 105/500, Iter: 9/119 -- train_loss: 1.3232 
2025-08-11 23:40:44,186 - INFO - Epoch: 105/500, Iter: 10/119 -- train_loss: 1.3747 
2025-08-11 23:40:44,378 - INFO - Epoch: 105/500, Iter: 11/119 -- train_loss: 1.6127 
2025-08-11 23:40:44,580 - INFO - Epoch: 105/500, Iter: 12/119 -- train_loss: 1.3698 
2025-08-11 23:40:44,795 - INFO - Epoch: 105/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:43:21,225 - INFO - Epoch: 106/500, Iter: 1/119 -- train_loss: 1.3572 


[1/119]   1%|           [00:00<?]

2025-08-11 23:43:21,433 - INFO - Epoch: 106/500, Iter: 2/119 -- train_loss: 1.3322 
2025-08-11 23:43:22,357 - INFO - Epoch: 106/500, Iter: 3/119 -- train_loss: 1.3255 
2025-08-11 23:43:22,555 - INFO - Epoch: 106/500, Iter: 4/119 -- train_loss: 1.4174 
2025-08-11 23:43:22,756 - INFO - Epoch: 106/500, Iter: 5/119 -- train_loss: 1.3092 
2025-08-11 23:43:23,113 - INFO - Epoch: 106/500, Iter: 6/119 -- train_loss: 1.4607 
2025-08-11 23:43:23,312 - INFO - Epoch: 106/500, Iter: 7/119 -- train_loss: 1.3732 
2025-08-11 23:43:23,515 - INFO - Epoch: 106/500, Iter: 8/119 -- train_loss: 1.3917 
2025-08-11 23:43:24,649 - INFO - Epoch: 106/500, Iter: 9/119 -- train_loss: 1.2255 
2025-08-11 23:43:24,853 - INFO - Epoch: 106/500, Iter: 10/119 -- train_loss: 1.2874 
2025-08-11 23:43:29,171 - INFO - Epoch: 106/500, Iter: 11/119 -- train_loss: 1.6870 
2025-08-11 23:43:29,370 - INFO - Epoch: 106/500, Iter: 12/119 -- train_loss: 1.2623 
2025-08-11 23:43:29,568 - INFO - Epoch: 106/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:46:02,989 - INFO - Epoch: 107/500, Iter: 1/119 -- train_loss: 1.4266 


[1/119]   1%|           [00:00<?]

2025-08-11 23:46:03,197 - INFO - Epoch: 107/500, Iter: 2/119 -- train_loss: 1.3952 
2025-08-11 23:46:03,418 - INFO - Epoch: 107/500, Iter: 3/119 -- train_loss: 1.3108 
2025-08-11 23:46:03,645 - INFO - Epoch: 107/500, Iter: 4/119 -- train_loss: 1.2421 
2025-08-11 23:46:04,868 - INFO - Epoch: 107/500, Iter: 5/119 -- train_loss: 1.4029 
2025-08-11 23:46:05,091 - INFO - Epoch: 107/500, Iter: 6/119 -- train_loss: 1.3941 
2025-08-11 23:46:05,298 - INFO - Epoch: 107/500, Iter: 7/119 -- train_loss: 1.3537 
2025-08-11 23:46:05,522 - INFO - Epoch: 107/500, Iter: 8/119 -- train_loss: 1.4117 
2025-08-11 23:46:09,297 - INFO - Epoch: 107/500, Iter: 9/119 -- train_loss: 1.4065 
2025-08-11 23:46:09,495 - INFO - Epoch: 107/500, Iter: 10/119 -- train_loss: 1.5289 
2025-08-11 23:46:09,699 - INFO - Epoch: 107/500, Iter: 11/119 -- train_loss: 1.3993 
2025-08-11 23:46:09,911 - INFO - Epoch: 107/500, Iter: 12/119 -- train_loss: 1.4238 
2025-08-11 23:46:10,136 - INFO - Epoch: 107/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:48:57,155 - INFO - Epoch: 108/500, Iter: 1/119 -- train_loss: 1.3299 


[1/119]   1%|           [00:00<?]

2025-08-11 23:48:58,613 - INFO - Epoch: 108/500, Iter: 2/119 -- train_loss: 1.4040 
2025-08-11 23:48:58,879 - INFO - Epoch: 108/500, Iter: 3/119 -- train_loss: 1.3626 
2025-08-11 23:48:59,108 - INFO - Epoch: 108/500, Iter: 4/119 -- train_loss: 1.3813 
2025-08-11 23:48:59,348 - INFO - Epoch: 108/500, Iter: 5/119 -- train_loss: 1.3545 
2025-08-11 23:48:59,551 - INFO - Epoch: 108/500, Iter: 6/119 -- train_loss: 1.3636 
2025-08-11 23:48:59,772 - INFO - Epoch: 108/500, Iter: 7/119 -- train_loss: 1.2658 
2025-08-11 23:49:00,001 - INFO - Epoch: 108/500, Iter: 8/119 -- train_loss: 1.3409 
2025-08-11 23:49:00,384 - INFO - Epoch: 108/500, Iter: 9/119 -- train_loss: 1.3299 
2025-08-11 23:49:02,090 - INFO - Epoch: 108/500, Iter: 10/119 -- train_loss: 1.4057 
2025-08-11 23:49:02,825 - INFO - Epoch: 108/500, Iter: 11/119 -- train_loss: 1.3187 
2025-08-11 23:49:03,054 - INFO - Epoch: 108/500, Iter: 12/119 -- train_loss: 1.3025 
2025-08-11 23:49:03,277 - INFO - Epoch: 108/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:51:39,716 - INFO - Epoch: 109/500, Iter: 1/119 -- train_loss: 1.3457 


[1/119]   1%|           [00:00<?]

2025-08-11 23:51:39,971 - INFO - Epoch: 109/500, Iter: 2/119 -- train_loss: 1.3146 
2025-08-11 23:51:40,224 - INFO - Epoch: 109/500, Iter: 3/119 -- train_loss: 1.4361 
2025-08-11 23:51:40,478 - INFO - Epoch: 109/500, Iter: 4/119 -- train_loss: 1.3620 
2025-08-11 23:51:40,741 - INFO - Epoch: 109/500, Iter: 5/119 -- train_loss: 1.3229 
2025-08-11 23:51:40,973 - INFO - Epoch: 109/500, Iter: 6/119 -- train_loss: 1.4372 
2025-08-11 23:51:41,185 - INFO - Epoch: 109/500, Iter: 7/119 -- train_loss: 1.3328 
2025-08-11 23:51:41,419 - INFO - Epoch: 109/500, Iter: 8/119 -- train_loss: 1.4270 
2025-08-11 23:51:43,653 - INFO - Epoch: 109/500, Iter: 9/119 -- train_loss: 1.2953 
2025-08-11 23:51:43,907 - INFO - Epoch: 109/500, Iter: 10/119 -- train_loss: 1.2571 
2025-08-11 23:51:44,113 - INFO - Epoch: 109/500, Iter: 11/119 -- train_loss: 1.3769 
2025-08-11 23:51:44,324 - INFO - Epoch: 109/500, Iter: 12/119 -- train_loss: 1.2464 
2025-08-11 23:51:44,553 - INFO - Epoch: 109/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:54:20,847 - INFO - Epoch: 110/500, Iter: 1/119 -- train_loss: 1.3576 


[1/119]   1%|           [00:00<?]

2025-08-11 23:54:21,075 - INFO - Epoch: 110/500, Iter: 2/119 -- train_loss: 1.3897 
2025-08-11 23:54:21,317 - INFO - Epoch: 110/500, Iter: 3/119 -- train_loss: 1.3559 
2025-08-11 23:54:21,548 - INFO - Epoch: 110/500, Iter: 4/119 -- train_loss: 1.2885 
2025-08-11 23:54:21,753 - INFO - Epoch: 110/500, Iter: 5/119 -- train_loss: 1.3123 
2025-08-11 23:54:22,672 - INFO - Epoch: 110/500, Iter: 6/119 -- train_loss: 1.3619 
2025-08-11 23:54:22,888 - INFO - Epoch: 110/500, Iter: 7/119 -- train_loss: 1.3744 
2025-08-11 23:54:23,116 - INFO - Epoch: 110/500, Iter: 8/119 -- train_loss: 1.3020 
2025-08-11 23:54:23,339 - INFO - Epoch: 110/500, Iter: 9/119 -- train_loss: 1.2510 
2025-08-11 23:54:24,106 - INFO - Epoch: 110/500, Iter: 10/119 -- train_loss: 1.4322 
2025-08-11 23:54:24,359 - INFO - Epoch: 110/500, Iter: 11/119 -- train_loss: 1.2794 
2025-08-11 23:54:24,585 - INFO - Epoch: 110/500, Iter: 12/119 -- train_loss: 1.3626 
2025-08-11 23:54:24,821 - INFO - Epoch: 110/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:57:05,360 - INFO - Epoch: 111/500, Iter: 1/119 -- train_loss: 1.3675 


[1/119]   1%|           [00:00<?]

2025-08-11 23:57:05,600 - INFO - Epoch: 111/500, Iter: 2/119 -- train_loss: 1.4138 
2025-08-11 23:57:05,802 - INFO - Epoch: 111/500, Iter: 3/119 -- train_loss: 1.2967 
2025-08-11 23:57:06,023 - INFO - Epoch: 111/500, Iter: 4/119 -- train_loss: 1.3737 
2025-08-11 23:57:06,232 - INFO - Epoch: 111/500, Iter: 5/119 -- train_loss: 1.3695 
2025-08-11 23:57:06,451 - INFO - Epoch: 111/500, Iter: 6/119 -- train_loss: 1.2963 
2025-08-11 23:57:06,688 - INFO - Epoch: 111/500, Iter: 7/119 -- train_loss: 1.2379 
2025-08-11 23:57:06,928 - INFO - Epoch: 111/500, Iter: 8/119 -- train_loss: 1.2532 
2025-08-11 23:57:08,309 - INFO - Epoch: 111/500, Iter: 9/119 -- train_loss: 1.3494 
2025-08-11 23:57:08,548 - INFO - Epoch: 111/500, Iter: 10/119 -- train_loss: 1.4253 
2025-08-11 23:57:08,770 - INFO - Epoch: 111/500, Iter: 11/119 -- train_loss: 1.3363 
2025-08-11 23:57:08,985 - INFO - Epoch: 111/500, Iter: 12/119 -- train_loss: 1.2591 
2025-08-11 23:57:09,201 - INFO - Epoch: 111/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 23:59:43,335 - INFO - Epoch: 112/500, Iter: 1/119 -- train_loss: 1.2232 


[1/119]   1%|           [00:00<?]

2025-08-11 23:59:45,134 - INFO - Epoch: 112/500, Iter: 2/119 -- train_loss: 1.3996 
2025-08-11 23:59:45,370 - INFO - Epoch: 112/500, Iter: 3/119 -- train_loss: 1.3546 
2025-08-11 23:59:45,580 - INFO - Epoch: 112/500, Iter: 4/119 -- train_loss: 1.4097 
2025-08-11 23:59:45,805 - INFO - Epoch: 112/500, Iter: 5/119 -- train_loss: 1.3712 
2025-08-11 23:59:46,053 - INFO - Epoch: 112/500, Iter: 6/119 -- train_loss: 1.3623 
2025-08-11 23:59:46,280 - INFO - Epoch: 112/500, Iter: 7/119 -- train_loss: 1.3559 
2025-08-11 23:59:46,487 - INFO - Epoch: 112/500, Iter: 8/119 -- train_loss: 1.3423 
2025-08-11 23:59:46,741 - INFO - Epoch: 112/500, Iter: 9/119 -- train_loss: 1.3195 
2025-08-11 23:59:48,341 - INFO - Epoch: 112/500, Iter: 10/119 -- train_loss: 1.2947 
2025-08-11 23:59:49,451 - INFO - Epoch: 112/500, Iter: 11/119 -- train_loss: 1.2821 
2025-08-11 23:59:49,664 - INFO - Epoch: 112/500, Iter: 12/119 -- train_loss: 1.2986 
2025-08-11 23:59:49,882 - INFO - Epoch: 112/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:02:19,234 - INFO - Epoch: 113/500, Iter: 1/119 -- train_loss: 1.4094 


[1/119]   1%|           [00:00<?]

2025-08-12 00:02:19,472 - INFO - Epoch: 113/500, Iter: 2/119 -- train_loss: 1.3587 
2025-08-12 00:02:19,693 - INFO - Epoch: 113/500, Iter: 3/119 -- train_loss: 1.3350 
2025-08-12 00:02:19,919 - INFO - Epoch: 113/500, Iter: 4/119 -- train_loss: 1.2897 
2025-08-12 00:02:20,155 - INFO - Epoch: 113/500, Iter: 5/119 -- train_loss: 1.3715 
2025-08-12 00:02:20,752 - INFO - Epoch: 113/500, Iter: 6/119 -- train_loss: 1.3911 
2025-08-12 00:02:20,989 - INFO - Epoch: 113/500, Iter: 7/119 -- train_loss: 1.3903 
2025-08-12 00:02:21,236 - INFO - Epoch: 113/500, Iter: 8/119 -- train_loss: 1.3543 
2025-08-12 00:02:24,999 - INFO - Epoch: 113/500, Iter: 9/119 -- train_loss: 1.4935 
2025-08-12 00:02:25,233 - INFO - Epoch: 113/500, Iter: 10/119 -- train_loss: 1.4701 
2025-08-12 00:02:25,473 - INFO - Epoch: 113/500, Iter: 11/119 -- train_loss: 1.2941 
2025-08-12 00:02:25,692 - INFO - Epoch: 113/500, Iter: 12/119 -- train_loss: 1.3365 
2025-08-12 00:02:25,908 - INFO - Epoch: 113/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:04:55,686 - INFO - Epoch: 114/500, Iter: 1/119 -- train_loss: 1.3088 


[1/119]   1%|           [00:00<?]

2025-08-12 00:04:55,955 - INFO - Epoch: 114/500, Iter: 2/119 -- train_loss: 1.3274 
2025-08-12 00:04:56,964 - INFO - Epoch: 114/500, Iter: 3/119 -- train_loss: 1.5117 
2025-08-12 00:04:57,198 - INFO - Epoch: 114/500, Iter: 4/119 -- train_loss: 1.2840 
2025-08-12 00:04:58,501 - INFO - Epoch: 114/500, Iter: 5/119 -- train_loss: 1.4058 
2025-08-12 00:04:58,710 - INFO - Epoch: 114/500, Iter: 6/119 -- train_loss: 1.3139 
2025-08-12 00:04:58,915 - INFO - Epoch: 114/500, Iter: 7/119 -- train_loss: 1.3030 
2025-08-12 00:04:59,163 - INFO - Epoch: 114/500, Iter: 8/119 -- train_loss: 1.4680 
2025-08-12 00:04:59,384 - INFO - Epoch: 114/500, Iter: 9/119 -- train_loss: 1.4947 
2025-08-12 00:05:00,140 - INFO - Epoch: 114/500, Iter: 10/119 -- train_loss: 1.3245 
2025-08-12 00:05:00,370 - INFO - Epoch: 114/500, Iter: 11/119 -- train_loss: 1.3795 
2025-08-12 00:05:00,616 - INFO - Epoch: 114/500, Iter: 12/119 -- train_loss: 1.3162 
2025-08-12 00:05:01,098 - INFO - Epoch: 114/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:07:33,609 - INFO - Epoch: 115/500, Iter: 1/119 -- train_loss: 1.3044 


[1/119]   1%|           [00:00<?]

2025-08-12 00:07:34,752 - INFO - Epoch: 115/500, Iter: 2/119 -- train_loss: 1.4140 
2025-08-12 00:07:34,955 - INFO - Epoch: 115/500, Iter: 3/119 -- train_loss: 1.2778 
2025-08-12 00:07:35,969 - INFO - Epoch: 115/500, Iter: 4/119 -- train_loss: 1.4235 
2025-08-12 00:07:36,181 - INFO - Epoch: 115/500, Iter: 5/119 -- train_loss: 1.2993 
2025-08-12 00:07:36,413 - INFO - Epoch: 115/500, Iter: 6/119 -- train_loss: 1.4075 
2025-08-12 00:07:36,638 - INFO - Epoch: 115/500, Iter: 7/119 -- train_loss: 1.4095 
2025-08-12 00:07:36,869 - INFO - Epoch: 115/500, Iter: 8/119 -- train_loss: 1.2893 
2025-08-12 00:07:37,107 - INFO - Epoch: 115/500, Iter: 9/119 -- train_loss: 1.2887 
2025-08-12 00:07:38,098 - INFO - Epoch: 115/500, Iter: 10/119 -- train_loss: 1.4235 
2025-08-12 00:07:38,319 - INFO - Epoch: 115/500, Iter: 11/119 -- train_loss: 1.2988 
2025-08-12 00:07:38,539 - INFO - Epoch: 115/500, Iter: 12/119 -- train_loss: 1.3758 
2025-08-12 00:07:38,792 - INFO - Epoch: 115/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:10:20,869 - INFO - Epoch: 116/500, Iter: 1/119 -- train_loss: 1.4405 


[1/119]   1%|           [00:00<?]

2025-08-12 00:10:21,072 - INFO - Epoch: 116/500, Iter: 2/119 -- train_loss: 1.4315 
2025-08-12 00:10:21,293 - INFO - Epoch: 116/500, Iter: 3/119 -- train_loss: 1.3111 
2025-08-12 00:10:21,513 - INFO - Epoch: 116/500, Iter: 4/119 -- train_loss: 1.3945 
2025-08-12 00:10:21,712 - INFO - Epoch: 116/500, Iter: 5/119 -- train_loss: 1.3202 
2025-08-12 00:10:21,913 - INFO - Epoch: 116/500, Iter: 6/119 -- train_loss: 1.3281 
2025-08-12 00:10:22,115 - INFO - Epoch: 116/500, Iter: 7/119 -- train_loss: 1.2949 
2025-08-12 00:10:22,307 - INFO - Epoch: 116/500, Iter: 8/119 -- train_loss: 1.3616 
2025-08-12 00:10:24,111 - INFO - Epoch: 116/500, Iter: 9/119 -- train_loss: 1.4028 
2025-08-12 00:10:24,324 - INFO - Epoch: 116/500, Iter: 10/119 -- train_loss: 1.3549 
2025-08-12 00:10:24,606 - INFO - Epoch: 116/500, Iter: 11/119 -- train_loss: 1.3532 
2025-08-12 00:10:24,809 - INFO - Epoch: 116/500, Iter: 12/119 -- train_loss: 1.4229 
2025-08-12 00:10:25,033 - INFO - Epoch: 116/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:13:07,813 - INFO - Epoch: 117/500, Iter: 1/119 -- train_loss: 1.2872 


[1/119]   1%|           [00:00<?]

2025-08-12 00:13:08,019 - INFO - Epoch: 117/500, Iter: 2/119 -- train_loss: 1.3844 
2025-08-12 00:13:08,256 - INFO - Epoch: 117/500, Iter: 3/119 -- train_loss: 1.4170 
2025-08-12 00:13:08,461 - INFO - Epoch: 117/500, Iter: 4/119 -- train_loss: 1.2881 
2025-08-12 00:13:08,683 - INFO - Epoch: 117/500, Iter: 5/119 -- train_loss: 1.3582 
2025-08-12 00:13:08,871 - INFO - Epoch: 117/500, Iter: 6/119 -- train_loss: 1.2986 
2025-08-12 00:13:09,091 - INFO - Epoch: 117/500, Iter: 7/119 -- train_loss: 1.4138 
2025-08-12 00:13:09,303 - INFO - Epoch: 117/500, Iter: 8/119 -- train_loss: 1.3022 
2025-08-12 00:13:10,050 - INFO - Epoch: 117/500, Iter: 9/119 -- train_loss: 1.5151 
2025-08-12 00:13:13,799 - INFO - Epoch: 117/500, Iter: 10/119 -- train_loss: 1.2773 
2025-08-12 00:13:14,001 - INFO - Epoch: 117/500, Iter: 11/119 -- train_loss: 1.3011 
2025-08-12 00:13:14,214 - INFO - Epoch: 117/500, Iter: 12/119 -- train_loss: 1.4437 
2025-08-12 00:13:14,416 - INFO - Epoch: 117/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:15:48,424 - INFO - Epoch: 118/500, Iter: 1/119 -- train_loss: 1.3645 


[1/119]   1%|           [00:00<?]

2025-08-12 00:15:51,410 - INFO - Epoch: 118/500, Iter: 2/119 -- train_loss: 1.3577 
2025-08-12 00:15:51,634 - INFO - Epoch: 118/500, Iter: 3/119 -- train_loss: 1.3123 
2025-08-12 00:15:51,836 - INFO - Epoch: 118/500, Iter: 4/119 -- train_loss: 1.3920 
2025-08-12 00:15:52,048 - INFO - Epoch: 118/500, Iter: 5/119 -- train_loss: 1.4464 
2025-08-12 00:15:52,258 - INFO - Epoch: 118/500, Iter: 6/119 -- train_loss: 1.5032 
2025-08-12 00:15:52,471 - INFO - Epoch: 118/500, Iter: 7/119 -- train_loss: 1.2374 
2025-08-12 00:15:52,663 - INFO - Epoch: 118/500, Iter: 8/119 -- train_loss: 1.4607 
2025-08-12 00:15:52,874 - INFO - Epoch: 118/500, Iter: 9/119 -- train_loss: 1.3767 
2025-08-12 00:15:57,122 - INFO - Epoch: 118/500, Iter: 10/119 -- train_loss: 1.4338 
2025-08-12 00:15:57,324 - INFO - Epoch: 118/500, Iter: 11/119 -- train_loss: 1.3961 
2025-08-12 00:15:57,550 - INFO - Epoch: 118/500, Iter: 12/119 -- train_loss: 1.3137 
2025-08-12 00:15:57,746 - INFO - Epoch: 118/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:18:37,550 - INFO - Epoch: 119/500, Iter: 1/119 -- train_loss: 1.2543 


[1/119]   1%|           [00:00<?]

2025-08-12 00:18:37,791 - INFO - Epoch: 119/500, Iter: 2/119 -- train_loss: 1.3746 
2025-08-12 00:18:38,020 - INFO - Epoch: 119/500, Iter: 3/119 -- train_loss: 1.3900 
2025-08-12 00:18:38,240 - INFO - Epoch: 119/500, Iter: 4/119 -- train_loss: 1.2483 
2025-08-12 00:18:38,439 - INFO - Epoch: 119/500, Iter: 5/119 -- train_loss: 1.3325 
2025-08-12 00:18:38,629 - INFO - Epoch: 119/500, Iter: 6/119 -- train_loss: 1.4451 
2025-08-12 00:18:38,864 - INFO - Epoch: 119/500, Iter: 7/119 -- train_loss: 1.4183 
2025-08-12 00:18:39,062 - INFO - Epoch: 119/500, Iter: 8/119 -- train_loss: 1.4205 
2025-08-12 00:18:39,611 - INFO - Epoch: 119/500, Iter: 9/119 -- train_loss: 1.3538 
2025-08-12 00:18:39,844 - INFO - Epoch: 119/500, Iter: 10/119 -- train_loss: 1.3493 
2025-08-12 00:18:40,086 - INFO - Epoch: 119/500, Iter: 11/119 -- train_loss: 1.3706 
2025-08-12 00:18:40,337 - INFO - Epoch: 119/500, Iter: 12/119 -- train_loss: 1.3359 
2025-08-12 00:18:40,539 - INFO - Epoch: 119/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:21:18,336 - INFO - Epoch: 120/500, Iter: 1/119 -- train_loss: 1.2960 


[1/119]   1%|           [00:00<?]

2025-08-12 00:21:18,705 - INFO - Epoch: 120/500, Iter: 2/119 -- train_loss: 1.4047 
2025-08-12 00:21:19,059 - INFO - Epoch: 120/500, Iter: 3/119 -- train_loss: 1.4804 
2025-08-12 00:21:19,302 - INFO - Epoch: 120/500, Iter: 4/119 -- train_loss: 1.4648 
2025-08-12 00:21:19,495 - INFO - Epoch: 120/500, Iter: 5/119 -- train_loss: 1.3955 
2025-08-12 00:21:19,707 - INFO - Epoch: 120/500, Iter: 6/119 -- train_loss: 1.6622 
2025-08-12 00:21:19,959 - INFO - Epoch: 120/500, Iter: 7/119 -- train_loss: 1.3850 
2025-08-12 00:21:20,212 - INFO - Epoch: 120/500, Iter: 8/119 -- train_loss: 1.2847 
2025-08-12 00:21:23,775 - INFO - Epoch: 120/500, Iter: 9/119 -- train_loss: 1.3291 
2025-08-12 00:21:23,989 - INFO - Epoch: 120/500, Iter: 10/119 -- train_loss: 1.3345 
2025-08-12 00:21:24,181 - INFO - Epoch: 120/500, Iter: 11/119 -- train_loss: 1.3553 
2025-08-12 00:21:24,373 - INFO - Epoch: 120/500, Iter: 12/119 -- train_loss: 1.2446 
2025-08-12 00:21:24,565 - INFO - Epoch: 120/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:24:04,121 - INFO - Epoch: 121/500, Iter: 1/119 -- train_loss: 1.3876 


[1/119]   1%|           [00:00<?]

2025-08-12 00:24:06,940 - INFO - Epoch: 121/500, Iter: 2/119 -- train_loss: 1.4422 
2025-08-12 00:24:07,132 - INFO - Epoch: 121/500, Iter: 3/119 -- train_loss: 1.3788 
2025-08-12 00:24:07,336 - INFO - Epoch: 121/500, Iter: 4/119 -- train_loss: 1.3500 
2025-08-12 00:24:07,841 - INFO - Epoch: 121/500, Iter: 5/119 -- train_loss: 1.3757 
2025-08-12 00:24:08,031 - INFO - Epoch: 121/500, Iter: 6/119 -- train_loss: 1.4720 
2025-08-12 00:24:08,236 - INFO - Epoch: 121/500, Iter: 7/119 -- train_loss: 1.3046 
2025-08-12 00:24:08,461 - INFO - Epoch: 121/500, Iter: 8/119 -- train_loss: 1.5285 
2025-08-12 00:24:08,676 - INFO - Epoch: 121/500, Iter: 9/119 -- train_loss: 1.2617 
2025-08-12 00:24:09,756 - INFO - Epoch: 121/500, Iter: 10/119 -- train_loss: 1.3507 
2025-08-12 00:24:09,959 - INFO - Epoch: 121/500, Iter: 11/119 -- train_loss: 1.2989 
2025-08-12 00:24:10,160 - INFO - Epoch: 121/500, Iter: 12/119 -- train_loss: 1.2545 
2025-08-12 00:24:10,366 - INFO - Epoch: 121/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:26:51,594 - INFO - Epoch: 122/500, Iter: 1/119 -- train_loss: 1.4324 


[1/119]   1%|           [00:00<?]

2025-08-12 00:26:51,803 - INFO - Epoch: 122/500, Iter: 2/119 -- train_loss: 1.2996 
2025-08-12 00:26:52,024 - INFO - Epoch: 122/500, Iter: 3/119 -- train_loss: 1.2418 
2025-08-12 00:26:52,231 - INFO - Epoch: 122/500, Iter: 4/119 -- train_loss: 1.2361 
2025-08-12 00:26:52,449 - INFO - Epoch: 122/500, Iter: 5/119 -- train_loss: 1.4368 
2025-08-12 00:26:52,678 - INFO - Epoch: 122/500, Iter: 6/119 -- train_loss: 1.3028 
2025-08-12 00:26:52,893 - INFO - Epoch: 122/500, Iter: 7/119 -- train_loss: 1.4043 
2025-08-12 00:26:53,957 - INFO - Epoch: 122/500, Iter: 8/119 -- train_loss: 1.3326 
2025-08-12 00:26:55,335 - INFO - Epoch: 122/500, Iter: 9/119 -- train_loss: 1.3228 
2025-08-12 00:26:55,529 - INFO - Epoch: 122/500, Iter: 10/119 -- train_loss: 1.4294 
2025-08-12 00:26:57,903 - INFO - Epoch: 122/500, Iter: 11/119 -- train_loss: 1.3875 
2025-08-12 00:26:58,119 - INFO - Epoch: 122/500, Iter: 12/119 -- train_loss: 1.2443 
2025-08-12 00:26:58,345 - INFO - Epoch: 122/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:29:42,589 - INFO - Epoch: 123/500, Iter: 1/119 -- train_loss: 1.4345 


[1/119]   1%|           [00:00<?]

2025-08-12 00:29:45,482 - INFO - Epoch: 123/500, Iter: 2/119 -- train_loss: 1.4051 
2025-08-12 00:29:45,711 - INFO - Epoch: 123/500, Iter: 3/119 -- train_loss: 1.2699 
2025-08-12 00:29:45,929 - INFO - Epoch: 123/500, Iter: 4/119 -- train_loss: 1.3937 
2025-08-12 00:29:46,126 - INFO - Epoch: 123/500, Iter: 5/119 -- train_loss: 1.4904 
2025-08-12 00:29:46,317 - INFO - Epoch: 123/500, Iter: 6/119 -- train_loss: 1.3504 
2025-08-12 00:29:46,526 - INFO - Epoch: 123/500, Iter: 7/119 -- train_loss: 1.3767 
2025-08-12 00:29:46,721 - INFO - Epoch: 123/500, Iter: 8/119 -- train_loss: 1.3026 
2025-08-12 00:29:46,935 - INFO - Epoch: 123/500, Iter: 9/119 -- train_loss: 1.3233 
2025-08-12 00:29:49,133 - INFO - Epoch: 123/500, Iter: 10/119 -- train_loss: 1.3023 
2025-08-12 00:29:49,377 - INFO - Epoch: 123/500, Iter: 11/119 -- train_loss: 1.3245 
2025-08-12 00:29:49,584 - INFO - Epoch: 123/500, Iter: 12/119 -- train_loss: 1.4037 
2025-08-12 00:29:49,777 - INFO - Epoch: 123/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:32:34,954 - INFO - Epoch: 124/500, Iter: 1/119 -- train_loss: 1.3053 


[1/119]   1%|           [00:00<?]

2025-08-12 00:32:35,153 - INFO - Epoch: 124/500, Iter: 2/119 -- train_loss: 1.3492 
2025-08-12 00:32:36,037 - INFO - Epoch: 124/500, Iter: 3/119 -- train_loss: 1.4508 
2025-08-12 00:32:36,330 - INFO - Epoch: 124/500, Iter: 4/119 -- train_loss: 1.4567 
2025-08-12 00:32:36,559 - INFO - Epoch: 124/500, Iter: 5/119 -- train_loss: 1.2782 
2025-08-12 00:32:36,794 - INFO - Epoch: 124/500, Iter: 6/119 -- train_loss: 1.3511 
2025-08-12 00:32:37,016 - INFO - Epoch: 124/500, Iter: 7/119 -- train_loss: 1.5181 
2025-08-12 00:32:37,216 - INFO - Epoch: 124/500, Iter: 8/119 -- train_loss: 1.3223 
2025-08-12 00:32:39,431 - INFO - Epoch: 124/500, Iter: 9/119 -- train_loss: 1.5390 
2025-08-12 00:32:39,683 - INFO - Epoch: 124/500, Iter: 10/119 -- train_loss: 1.3721 
2025-08-12 00:32:39,935 - INFO - Epoch: 124/500, Iter: 11/119 -- train_loss: 1.3791 
2025-08-12 00:32:40,146 - INFO - Epoch: 124/500, Iter: 12/119 -- train_loss: 1.2780 
2025-08-12 00:32:41,598 - INFO - Epoch: 124/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:35:25,922 - INFO - Epoch: 125/500, Iter: 1/119 -- train_loss: 1.2579 


[1/119]   1%|           [00:00<?]

2025-08-12 00:35:26,637 - INFO - Epoch: 125/500, Iter: 2/119 -- train_loss: 1.7079 
2025-08-12 00:35:26,844 - INFO - Epoch: 125/500, Iter: 3/119 -- train_loss: 1.3839 
2025-08-12 00:35:27,055 - INFO - Epoch: 125/500, Iter: 4/119 -- train_loss: 1.3820 
2025-08-12 00:35:27,293 - INFO - Epoch: 125/500, Iter: 5/119 -- train_loss: 1.4533 
2025-08-12 00:35:27,510 - INFO - Epoch: 125/500, Iter: 6/119 -- train_loss: 1.4819 
2025-08-12 00:35:27,734 - INFO - Epoch: 125/500, Iter: 7/119 -- train_loss: 1.3781 
2025-08-12 00:35:27,935 - INFO - Epoch: 125/500, Iter: 8/119 -- train_loss: 1.3642 
2025-08-12 00:35:30,395 - INFO - Epoch: 125/500, Iter: 9/119 -- train_loss: 1.4137 
2025-08-12 00:35:33,067 - INFO - Epoch: 125/500, Iter: 10/119 -- train_loss: 1.5186 
2025-08-12 00:35:33,312 - INFO - Epoch: 125/500, Iter: 11/119 -- train_loss: 1.2417 
2025-08-12 00:35:33,509 - INFO - Epoch: 125/500, Iter: 12/119 -- train_loss: 1.2964 
2025-08-12 00:35:33,702 - INFO - Epoch: 125/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:38:20,498 - INFO - Epoch: 126/500, Iter: 1/119 -- train_loss: 1.3101 


[1/119]   1%|           [00:00<?]

2025-08-12 00:38:20,900 - INFO - Epoch: 126/500, Iter: 2/119 -- train_loss: 1.3242 
2025-08-12 00:38:24,869 - INFO - Epoch: 126/500, Iter: 3/119 -- train_loss: 1.2592 
2025-08-12 00:38:25,244 - INFO - Epoch: 126/500, Iter: 4/119 -- train_loss: 1.3732 
2025-08-12 00:38:25,665 - INFO - Epoch: 126/500, Iter: 5/119 -- train_loss: 1.4379 
2025-08-12 00:38:26,023 - INFO - Epoch: 126/500, Iter: 6/119 -- train_loss: 1.5177 
2025-08-12 00:38:26,412 - INFO - Epoch: 126/500, Iter: 7/119 -- train_loss: 1.3341 
2025-08-12 00:38:26,771 - INFO - Epoch: 126/500, Iter: 8/119 -- train_loss: 1.4556 
2025-08-12 00:38:32,432 - INFO - Epoch: 126/500, Iter: 9/119 -- train_loss: 1.3514 
2025-08-12 00:38:32,707 - INFO - Epoch: 126/500, Iter: 10/119 -- train_loss: 1.3556 
2025-08-12 00:38:32,980 - INFO - Epoch: 126/500, Iter: 11/119 -- train_loss: 1.3661 
2025-08-12 00:38:33,250 - INFO - Epoch: 126/500, Iter: 12/119 -- train_loss: 1.3021 
2025-08-12 00:38:33,512 - INFO - Epoch: 126/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:41:48,127 - INFO - Epoch: 127/500, Iter: 1/119 -- train_loss: 1.4177 


[1/119]   1%|           [00:00<?]

2025-08-12 00:41:48,502 - INFO - Epoch: 127/500, Iter: 2/119 -- train_loss: 1.3067 
2025-08-12 00:41:52,363 - INFO - Epoch: 127/500, Iter: 3/119 -- train_loss: 1.3987 
2025-08-12 00:41:52,634 - INFO - Epoch: 127/500, Iter: 4/119 -- train_loss: 1.3460 
2025-08-12 00:41:52,902 - INFO - Epoch: 127/500, Iter: 5/119 -- train_loss: 1.3659 
2025-08-12 00:41:53,158 - INFO - Epoch: 127/500, Iter: 6/119 -- train_loss: 1.3338 
2025-08-12 00:41:53,427 - INFO - Epoch: 127/500, Iter: 7/119 -- train_loss: 1.3835 
2025-08-12 00:41:53,698 - INFO - Epoch: 127/500, Iter: 8/119 -- train_loss: 1.3351 
2025-08-12 00:41:53,990 - INFO - Epoch: 127/500, Iter: 9/119 -- train_loss: 1.3879 
2025-08-12 00:41:54,271 - INFO - Epoch: 127/500, Iter: 10/119 -- train_loss: 1.4982 
2025-08-12 00:41:56,198 - INFO - Epoch: 127/500, Iter: 11/119 -- train_loss: 1.3573 
2025-08-12 00:41:56,799 - INFO - Epoch: 127/500, Iter: 12/119 -- train_loss: 1.3192 
2025-08-12 00:41:57,048 - INFO - Epoch: 127/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:45:07,538 - INFO - Epoch: 128/500, Iter: 1/119 -- train_loss: 1.3317 


[1/119]   1%|           [00:00<?]

2025-08-12 00:45:07,817 - INFO - Epoch: 128/500, Iter: 2/119 -- train_loss: 1.2550 
2025-08-12 00:45:08,085 - INFO - Epoch: 128/500, Iter: 3/119 -- train_loss: 1.3457 
2025-08-12 00:45:09,667 - INFO - Epoch: 128/500, Iter: 4/119 -- train_loss: 1.3455 
2025-08-12 00:45:09,951 - INFO - Epoch: 128/500, Iter: 5/119 -- train_loss: 1.3836 
2025-08-12 00:45:10,244 - INFO - Epoch: 128/500, Iter: 6/119 -- train_loss: 1.3685 
2025-08-12 00:45:10,491 - INFO - Epoch: 128/500, Iter: 7/119 -- train_loss: 1.3037 
2025-08-12 00:45:10,779 - INFO - Epoch: 128/500, Iter: 8/119 -- train_loss: 1.3330 
2025-08-12 00:45:11,107 - INFO - Epoch: 128/500, Iter: 9/119 -- train_loss: 1.4186 
2025-08-12 00:45:11,394 - INFO - Epoch: 128/500, Iter: 10/119 -- train_loss: 1.3782 
2025-08-12 00:45:11,669 - INFO - Epoch: 128/500, Iter: 11/119 -- train_loss: 1.3432 
2025-08-12 00:45:15,655 - INFO - Epoch: 128/500, Iter: 12/119 -- train_loss: 1.3001 
2025-08-12 00:45:15,958 - INFO - Epoch: 128/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:48:43,440 - INFO - Epoch: 129/500, Iter: 1/119 -- train_loss: 1.3559 


[1/119]   1%|           [00:00<?]

2025-08-12 00:48:43,723 - INFO - Epoch: 129/500, Iter: 2/119 -- train_loss: 1.3948 
2025-08-12 00:48:43,984 - INFO - Epoch: 129/500, Iter: 3/119 -- train_loss: 1.3786 
2025-08-12 00:48:44,249 - INFO - Epoch: 129/500, Iter: 4/119 -- train_loss: 1.4601 
2025-08-12 00:48:44,510 - INFO - Epoch: 129/500, Iter: 5/119 -- train_loss: 1.3935 
2025-08-12 00:48:44,784 - INFO - Epoch: 129/500, Iter: 6/119 -- train_loss: 1.3031 
2025-08-12 00:48:45,088 - INFO - Epoch: 129/500, Iter: 7/119 -- train_loss: 1.2336 
2025-08-12 00:48:45,375 - INFO - Epoch: 129/500, Iter: 8/119 -- train_loss: 1.3639 
2025-08-12 00:48:47,070 - INFO - Epoch: 129/500, Iter: 9/119 -- train_loss: 1.3919 
2025-08-12 00:48:47,353 - INFO - Epoch: 129/500, Iter: 10/119 -- train_loss: 1.2448 
2025-08-12 00:48:47,616 - INFO - Epoch: 129/500, Iter: 11/119 -- train_loss: 1.3591 
2025-08-12 00:48:47,920 - INFO - Epoch: 129/500, Iter: 12/119 -- train_loss: 1.3283 
2025-08-12 00:48:48,203 - INFO - Epoch: 129/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:51:40,211 - INFO - Epoch: 130/500, Iter: 1/119 -- train_loss: 1.4690 


[1/119]   1%|           [00:00<?]

2025-08-12 00:51:40,419 - INFO - Epoch: 130/500, Iter: 2/119 -- train_loss: 1.3604 
2025-08-12 00:51:40,654 - INFO - Epoch: 130/500, Iter: 3/119 -- train_loss: 1.3337 
2025-08-12 00:51:40,884 - INFO - Epoch: 130/500, Iter: 4/119 -- train_loss: 1.2825 
2025-08-12 00:51:41,094 - INFO - Epoch: 130/500, Iter: 5/119 -- train_loss: 1.2515 
2025-08-12 00:51:41,313 - INFO - Epoch: 130/500, Iter: 6/119 -- train_loss: 1.4443 
2025-08-12 00:51:41,535 - INFO - Epoch: 130/500, Iter: 7/119 -- train_loss: 1.2723 
2025-08-12 00:51:41,774 - INFO - Epoch: 130/500, Iter: 8/119 -- train_loss: 1.3110 
2025-08-12 00:51:42,027 - INFO - Epoch: 130/500, Iter: 9/119 -- train_loss: 1.3879 
2025-08-12 00:51:42,264 - INFO - Epoch: 130/500, Iter: 10/119 -- train_loss: 1.3201 
2025-08-12 00:51:42,837 - INFO - Epoch: 130/500, Iter: 11/119 -- train_loss: 1.3844 
2025-08-12 00:51:43,086 - INFO - Epoch: 130/500, Iter: 12/119 -- train_loss: 1.4102 
2025-08-12 00:51:43,301 - INFO - Epoch: 130/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:54:25,510 - INFO - Epoch: 131/500, Iter: 1/119 -- train_loss: 1.4463 


[1/119]   1%|           [00:00<?]

2025-08-12 00:54:25,730 - INFO - Epoch: 131/500, Iter: 2/119 -- train_loss: 1.3264 
2025-08-12 00:54:26,011 - INFO - Epoch: 131/500, Iter: 3/119 -- train_loss: 1.3872 
2025-08-12 00:54:26,749 - INFO - Epoch: 131/500, Iter: 4/119 -- train_loss: 1.2787 
2025-08-12 00:54:26,983 - INFO - Epoch: 131/500, Iter: 5/119 -- train_loss: 1.2852 
2025-08-12 00:54:27,195 - INFO - Epoch: 131/500, Iter: 6/119 -- train_loss: 1.3265 
2025-08-12 00:54:27,450 - INFO - Epoch: 131/500, Iter: 7/119 -- train_loss: 1.2052 
2025-08-12 00:54:27,711 - INFO - Epoch: 131/500, Iter: 8/119 -- train_loss: 1.3546 
2025-08-12 00:54:28,130 - INFO - Epoch: 131/500, Iter: 9/119 -- train_loss: 1.3193 
2025-08-12 00:54:28,381 - INFO - Epoch: 131/500, Iter: 10/119 -- train_loss: 1.3128 
2025-08-12 00:54:29,501 - INFO - Epoch: 131/500, Iter: 11/119 -- train_loss: 1.3261 
2025-08-12 00:54:30,638 - INFO - Epoch: 131/500, Iter: 12/119 -- train_loss: 1.3432 
2025-08-12 00:54:30,858 - INFO - Epoch: 131/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 00:57:20,935 - INFO - Epoch: 132/500, Iter: 1/119 -- train_loss: 1.3093 


[1/119]   1%|           [00:00<?]

2025-08-12 00:57:22,395 - INFO - Epoch: 132/500, Iter: 2/119 -- train_loss: 1.3787 
2025-08-12 00:57:22,639 - INFO - Epoch: 132/500, Iter: 3/119 -- train_loss: 1.2708 
2025-08-12 00:57:22,893 - INFO - Epoch: 132/500, Iter: 4/119 -- train_loss: 1.3691 
2025-08-12 00:57:23,116 - INFO - Epoch: 132/500, Iter: 5/119 -- train_loss: 1.3759 
2025-08-12 00:57:23,331 - INFO - Epoch: 132/500, Iter: 6/119 -- train_loss: 1.4906 
2025-08-12 00:57:23,580 - INFO - Epoch: 132/500, Iter: 7/119 -- train_loss: 1.3999 
2025-08-12 00:57:23,831 - INFO - Epoch: 132/500, Iter: 8/119 -- train_loss: 1.3105 
2025-08-12 00:57:24,086 - INFO - Epoch: 132/500, Iter: 9/119 -- train_loss: 1.3668 
2025-08-12 00:57:27,349 - INFO - Epoch: 132/500, Iter: 10/119 -- train_loss: 1.3066 
2025-08-12 00:57:27,556 - INFO - Epoch: 132/500, Iter: 11/119 -- train_loss: 1.2787 
2025-08-12 00:57:27,826 - INFO - Epoch: 132/500, Iter: 12/119 -- train_loss: 1.3319 
2025-08-12 00:57:28,034 - INFO - Epoch: 132/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 01:00:14,214 - INFO - Epoch: 133/500, Iter: 1/119 -- train_loss: 1.3490 


[1/119]   1%|           [00:00<?]

2025-08-12 01:00:16,772 - INFO - Epoch: 133/500, Iter: 2/119 -- train_loss: 1.3578 
2025-08-12 01:00:17,278 - INFO - Epoch: 133/500, Iter: 3/119 -- train_loss: 1.3567 
2025-08-12 01:00:17,538 - INFO - Epoch: 133/500, Iter: 4/119 -- train_loss: 1.3675 
2025-08-12 01:00:17,808 - INFO - Epoch: 133/500, Iter: 5/119 -- train_loss: 1.4533 
2025-08-12 01:00:18,063 - INFO - Epoch: 133/500, Iter: 6/119 -- train_loss: 1.3195 
2025-08-12 01:00:18,312 - INFO - Epoch: 133/500, Iter: 7/119 -- train_loss: 1.4405 
2025-08-12 01:00:18,558 - INFO - Epoch: 133/500, Iter: 8/119 -- train_loss: 1.3629 
2025-08-12 01:00:18,806 - INFO - Epoch: 133/500, Iter: 9/119 -- train_loss: 1.4214 
2025-08-12 01:00:20,392 - INFO - Epoch: 133/500, Iter: 10/119 -- train_loss: 1.4157 
2025-08-12 01:00:22,557 - INFO - Epoch: 133/500, Iter: 11/119 -- train_loss: 1.4201 
2025-08-12 01:00:22,775 - INFO - Epoch: 133/500, Iter: 12/119 -- train_loss: 1.2825 
2025-08-12 01:00:22,994 - INFO - Epoch: 133/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 01:03:05,014 - INFO - Epoch: 134/500, Iter: 1/119 -- train_loss: 1.3011 


[1/119]   1%|           [00:00<?]

2025-08-12 01:03:06,001 - INFO - Epoch: 134/500, Iter: 2/119 -- train_loss: 1.3125 
2025-08-12 01:03:06,225 - INFO - Epoch: 134/500, Iter: 3/119 -- train_loss: 1.2599 
2025-08-12 01:03:06,454 - INFO - Epoch: 134/500, Iter: 4/119 -- train_loss: 1.2682 
2025-08-12 01:03:06,691 - INFO - Epoch: 134/500, Iter: 5/119 -- train_loss: 1.3477 
2025-08-12 01:03:06,944 - INFO - Epoch: 134/500, Iter: 6/119 -- train_loss: 1.3105 
2025-08-12 01:03:07,226 - INFO - Epoch: 134/500, Iter: 7/119 -- train_loss: 1.4744 
2025-08-12 01:03:07,466 - INFO - Epoch: 134/500, Iter: 8/119 -- train_loss: 1.2847 
2025-08-12 01:03:07,722 - INFO - Epoch: 134/500, Iter: 9/119 -- train_loss: 1.3494 
2025-08-12 01:03:08,613 - INFO - Epoch: 134/500, Iter: 10/119 -- train_loss: 1.3091 
2025-08-12 01:03:09,348 - INFO - Epoch: 134/500, Iter: 11/119 -- train_loss: 1.3675 
2025-08-12 01:03:12,093 - INFO - Epoch: 134/500, Iter: 12/119 -- train_loss: 1.4576 
2025-08-12 01:03:12,324 - INFO - Epoch: 134/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 01:05:57,559 - INFO - Epoch: 135/500, Iter: 1/119 -- train_loss: 1.4223 


[1/119]   1%|           [00:00<?]

2025-08-12 01:05:57,788 - INFO - Epoch: 135/500, Iter: 2/119 -- train_loss: 1.3238 
2025-08-12 01:05:58,041 - INFO - Epoch: 135/500, Iter: 3/119 -- train_loss: 1.4051 
2025-08-12 01:05:58,294 - INFO - Epoch: 135/500, Iter: 4/119 -- train_loss: 1.3546 
2025-08-12 01:05:58,541 - INFO - Epoch: 135/500, Iter: 5/119 -- train_loss: 1.4232 
2025-08-12 01:05:58,769 - INFO - Epoch: 135/500, Iter: 6/119 -- train_loss: 1.4012 
2025-08-12 01:05:58,993 - INFO - Epoch: 135/500, Iter: 7/119 -- train_loss: 1.4280 
2025-08-12 01:05:59,261 - INFO - Epoch: 135/500, Iter: 8/119 -- train_loss: 1.4078 
2025-08-12 01:06:00,718 - INFO - Epoch: 135/500, Iter: 9/119 -- train_loss: 1.4088 
2025-08-12 01:06:00,960 - INFO - Epoch: 135/500, Iter: 10/119 -- train_loss: 1.2734 
2025-08-12 01:06:01,173 - INFO - Epoch: 135/500, Iter: 11/119 -- train_loss: 1.3547 
2025-08-12 01:06:01,406 - INFO - Epoch: 135/500, Iter: 12/119 -- train_loss: 1.3217 
2025-08-12 01:06:01,635 - INFO - Epoch: 135/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 01:08:50,887 - INFO - Epoch: 136/500, Iter: 1/119 -- train_loss: 1.3647 


[1/119]   1%|           [00:00<?]

2025-08-12 01:08:51,143 - INFO - Epoch: 136/500, Iter: 2/119 -- train_loss: 1.2820 
2025-08-12 01:08:51,402 - INFO - Epoch: 136/500, Iter: 3/119 -- train_loss: 1.4289 
2025-08-12 01:08:51,648 - INFO - Epoch: 136/500, Iter: 4/119 -- train_loss: 1.4116 
2025-08-12 01:08:51,869 - INFO - Epoch: 136/500, Iter: 5/119 -- train_loss: 1.3750 
2025-08-12 01:08:52,102 - INFO - Epoch: 136/500, Iter: 6/119 -- train_loss: 1.3064 
2025-08-12 01:08:52,335 - INFO - Epoch: 136/500, Iter: 7/119 -- train_loss: 1.2731 
2025-08-12 01:08:52,574 - INFO - Epoch: 136/500, Iter: 8/119 -- train_loss: 1.3397 
2025-08-12 01:08:55,384 - INFO - Epoch: 136/500, Iter: 9/119 -- train_loss: 1.3379 
2025-08-12 01:08:55,652 - INFO - Epoch: 136/500, Iter: 10/119 -- train_loss: 1.2501 
2025-08-12 01:08:55,861 - INFO - Epoch: 136/500, Iter: 11/119 -- train_loss: 1.2779 
2025-08-12 01:08:56,638 - INFO - Epoch: 136/500, Iter: 12/119 -- train_loss: 1.2440 
2025-08-12 01:08:56,839 - INFO - Epoch: 136/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-12 01:11:39,642 - INFO - Epoch: 137/500, Iter: 1/119 -- train_loss: 1.3957 


[1/119]   1%|           [00:00<?]

2025-08-12 01:11:39,867 - INFO - Epoch: 137/500, Iter: 2/119 -- train_loss: 1.3537 
2025-08-12 01:11:40,482 - INFO - Epoch: 137/500, Iter: 3/119 -- train_loss: 1.2463 
2025-08-12 01:11:40,707 - INFO - Epoch: 137/500, Iter: 4/119 -- train_loss: 1.3158 
2025-08-12 01:11:40,937 - INFO - Epoch: 137/500, Iter: 5/119 -- train_loss: 1.2725 
2025-08-12 01:11:41,194 - INFO - Epoch: 137/500, Iter: 6/119 -- train_loss: 1.3135 
2025-08-12 01:11:41,411 - INFO - Epoch: 137/500, Iter: 7/119 -- train_loss: 1.2416 
2025-08-12 01:11:41,643 - INFO - Epoch: 137/500, Iter: 8/119 -- train_loss: 1.4404 
2025-08-12 01:11:42,252 - INFO - Epoch: 137/500, Iter: 9/119 -- train_loss: 1.3258 
2025-08-12 01:11:42,490 - INFO - Epoch: 137/500, Iter: 10/119 -- train_loss: 1.2840 
2025-08-12 01:11:43,495 - INFO - Epoch: 137/500, Iter: 11/119 -- train_loss: 1.2866 
2025-08-12 01:11:44,231 - INFO - Epoch: 137/500, Iter: 12/119 -- train_loss: 1.4798 
2025-08-12 01:11:44,487 - INFO - Epoch: 137/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

Finish the training with final evaluation of the best model. To allow visualization of all outputs, add OutputStore handler first. Otherwise only output form the last epoch will be accessible. 

In [None]:
eos_handler = ignite.handlers.EpochOutputStore()
eos_handler.attach(trainer.evaluator, "output")

In [None]:
trainer.evaluate(checkpoint="models/<model-name>.pt")

Generate a markdown document with segmentation results

In [None]:
report_generator = ReportGenerator(config.run_id, config.out_dir, config.log_dir)
report_generator.generate_report()

Have a look at some outputs

In [None]:
output = trainer.evaluator.state.output
keys = ["image", "label", "pred"]
outputs = {k: [o[0][k].detach().cpu().squeeze() for o in output] for k in keys}

In [None]:
ListViewer(
    [o.transpose(0, 2).flip(-2) for o in outputs["image"][0:3]]
    + [o.argmax(0).transpose(0, 2).flip(-2).float() for o in outputs["label"][0:3]]
    + [o.argmax(0).transpose(0, 2).flip(-2).float() for o in outputs["pred"][0:3]]
).show()