# Segmentation Example
> Train a U-Net for pixelwise segmentation of the prostate

In [1]:
import monai
import ignite

from prostate158.utils import load_config
from prostate158.train import SegmentationTrainer
from prostate158.report import ReportGenerator
from prostate158.viewer import ListViewer

### Installation Procedure of dependencies 

In [2]:
# # if you have a requirements.txt:
# !pip install --upgrade pip
# !pip install -r requirements.txt

# # Otherwise install core libs directly:
# !pip install monai["all"] ignite matplotlib pyyaml munch

# Finally
# !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# !pip install opencv-python
# !pip install ipywidgets

In [3]:
import torch

print(torch.cuda.is_available())  # For PyTorch

True


All parameters needed for training and evaluation are set in `anatomy.yaml` file. 

In [4]:
# config = load_config('anatomy.yaml') # change to 'anatomy.yaml' for anatomy segmentation
cfg = load_config("tumor.yaml")
monai.utils.set_determinism(seed=cfg.seed)
cfg.model.type = "rrunet3d"
# cfg.model.type = "unet"

In [5]:
from prostate158.model import get_model

model = get_model(cfg).to(cfg.device)  # Move the model to the selected device
print(model)

[get_model] model.type = 'rrunet3d'
RRUNet3D(
  (pool): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (unpool): Upsample(scale_factor=2.0, mode='nearest')
  (input_conv): Conv3d(3, 16, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
  (output_conv): Conv3d(16, 2, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
  (output_activation): Softmax(dim=1)
  (encoders): ModuleList(
    (0): ResidualBlock(
      (CONV): ConvBlock(
        (ops): ModuleList(
          (0): Conv3d(16, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
          (1): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
      )
      (RCNN): Sequential(
        (0): RecurrentBlock(
          (conv): Sequential(
            (0): Conv3d(16, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
            (1): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_s

Create supervised trainer for segmentation task

In [6]:
trainer = SegmentationTrainer(
    progress_bar=True,
    early_stopping=True,
    metrics=["MeanDice", "HausdorffDistance", "SurfaceDistance"],
    save_latest_metrics=True,
    config=cfg,
)

# # Load pre-trained weights from tumor.pt
# trainer.load_checkpoint("models/tumor.pt")

[get_model] model.type = 'rrunet3d'


Adding a learning rate scheduler for one-cylce policy. 

In [7]:
trainer.fit_one_cycle()

Let's train. This can take several hours. 

In [8]:
trainer.run()

`torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
`torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.


2025-08-10 16:23:14,290 - INFO - Epoch: 1/500, Iter: 1/119 -- train_loss: 1.3051 


[1/119]   1%|           [00:00<?]

2025-08-10 16:23:21,176 - INFO - Epoch: 1/500, Iter: 2/119 -- train_loss: 1.2622 
2025-08-10 16:23:21,586 - INFO - Epoch: 1/500, Iter: 3/119 -- train_loss: 1.3226 
2025-08-10 16:23:22,001 - INFO - Epoch: 1/500, Iter: 4/119 -- train_loss: 1.3328 
2025-08-10 16:23:22,356 - INFO - Epoch: 1/500, Iter: 5/119 -- train_loss: 1.3364 
2025-08-10 16:23:22,753 - INFO - Epoch: 1/500, Iter: 6/119 -- train_loss: 1.2986 
2025-08-10 16:23:23,159 - INFO - Epoch: 1/500, Iter: 7/119 -- train_loss: 1.3207 
2025-08-10 16:23:23,487 - INFO - Epoch: 1/500, Iter: 8/119 -- train_loss: 1.3197 
2025-08-10 16:23:23,839 - INFO - Epoch: 1/500, Iter: 9/119 -- train_loss: 1.3195 
2025-08-10 16:23:27,484 - INFO - Epoch: 1/500, Iter: 10/119 -- train_loss: 1.2773 
2025-08-10 16:23:27,838 - INFO - Epoch: 1/500, Iter: 11/119 -- train_loss: 1.3014 
2025-08-10 16:23:28,162 - INFO - Epoch: 1/500, Iter: 12/119 -- train_loss: 1.3029 
2025-08-10 16:23:28,491 - INFO - Epoch: 1/500, Iter: 13/119 -- train_loss: 1.3017 
2025-08-10 1

`torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.


[1/20]   5%|5          [00:00<?]

the ground truth of class 0 is all 0, this may result in nan/inf distance.
Mean of empty slice


2025-08-10 16:27:01,515 - INFO - Epoch: 2/500, Iter: 1/119 -- train_loss: 1.2455 


[1/119]   1%|           [00:00<?]

2025-08-10 16:27:07,508 - INFO - Epoch: 2/500, Iter: 2/119 -- train_loss: 1.2581 
2025-08-10 16:27:07,779 - INFO - Epoch: 2/500, Iter: 3/119 -- train_loss: 1.2397 
2025-08-10 16:27:08,080 - INFO - Epoch: 2/500, Iter: 4/119 -- train_loss: 1.2405 
2025-08-10 16:27:08,347 - INFO - Epoch: 2/500, Iter: 5/119 -- train_loss: 1.2670 
2025-08-10 16:27:08,625 - INFO - Epoch: 2/500, Iter: 6/119 -- train_loss: 1.2436 
2025-08-10 16:27:08,911 - INFO - Epoch: 2/500, Iter: 7/119 -- train_loss: 1.2602 
2025-08-10 16:27:09,210 - INFO - Epoch: 2/500, Iter: 8/119 -- train_loss: 1.2281 
2025-08-10 16:27:09,526 - INFO - Epoch: 2/500, Iter: 9/119 -- train_loss: 1.2413 
2025-08-10 16:27:14,895 - INFO - Epoch: 2/500, Iter: 10/119 -- train_loss: 1.2550 
2025-08-10 16:27:15,207 - INFO - Epoch: 2/500, Iter: 11/119 -- train_loss: 1.2277 
2025-08-10 16:27:15,496 - INFO - Epoch: 2/500, Iter: 12/119 -- train_loss: 1.2473 
2025-08-10 16:27:15,778 - INFO - Epoch: 2/500, Iter: 13/119 -- train_loss: 1.2549 
2025-08-10 1

[1/20]   5%|5          [00:00<?]

2025-08-10 16:30:58,403 - INFO - Epoch: 3/500, Iter: 1/119 -- train_loss: 1.2218 


[1/119]   1%|           [00:00<?]

2025-08-10 16:30:58,718 - INFO - Epoch: 3/500, Iter: 2/119 -- train_loss: 1.2203 
2025-08-10 16:30:59,840 - INFO - Epoch: 3/500, Iter: 3/119 -- train_loss: 1.2195 
2025-08-10 16:31:00,134 - INFO - Epoch: 3/500, Iter: 4/119 -- train_loss: 1.2207 
2025-08-10 16:31:00,452 - INFO - Epoch: 3/500, Iter: 5/119 -- train_loss: 1.2204 
2025-08-10 16:31:00,745 - INFO - Epoch: 3/500, Iter: 6/119 -- train_loss: 1.2142 
2025-08-10 16:31:01,033 - INFO - Epoch: 3/500, Iter: 7/119 -- train_loss: 1.1853 
2025-08-10 16:31:01,365 - INFO - Epoch: 3/500, Iter: 8/119 -- train_loss: 1.2093 
2025-08-10 16:31:04,305 - INFO - Epoch: 3/500, Iter: 9/119 -- train_loss: 1.2168 
2025-08-10 16:31:04,632 - INFO - Epoch: 3/500, Iter: 10/119 -- train_loss: 1.2180 
2025-08-10 16:31:07,992 - INFO - Epoch: 3/500, Iter: 11/119 -- train_loss: 1.1948 
2025-08-10 16:31:08,271 - INFO - Epoch: 3/500, Iter: 12/119 -- train_loss: 1.2126 
2025-08-10 16:31:08,575 - INFO - Epoch: 3/500, Iter: 13/119 -- train_loss: 1.2002 
2025-08-10 1

[1/20]   5%|5          [00:00<?]

the prediction of class 0 is all 0, this may result in nan/inf distance.


2025-08-10 16:35:06,754 - INFO - Epoch: 4/500, Iter: 1/119 -- train_loss: 1.1813 


[1/119]   1%|           [00:00<?]

2025-08-10 16:35:07,057 - INFO - Epoch: 4/500, Iter: 2/119 -- train_loss: 1.1917 
2025-08-10 16:35:10,721 - INFO - Epoch: 4/500, Iter: 3/119 -- train_loss: 1.2037 
2025-08-10 16:35:11,019 - INFO - Epoch: 4/500, Iter: 4/119 -- train_loss: 1.1946 
2025-08-10 16:35:11,306 - INFO - Epoch: 4/500, Iter: 5/119 -- train_loss: 1.2066 
2025-08-10 16:35:11,605 - INFO - Epoch: 4/500, Iter: 6/119 -- train_loss: 1.1864 
2025-08-10 16:35:11,916 - INFO - Epoch: 4/500, Iter: 7/119 -- train_loss: 1.1784 
2025-08-10 16:35:12,223 - INFO - Epoch: 4/500, Iter: 8/119 -- train_loss: 1.2097 
2025-08-10 16:35:18,757 - INFO - Epoch: 4/500, Iter: 9/119 -- train_loss: 1.2050 
2025-08-10 16:35:19,035 - INFO - Epoch: 4/500, Iter: 10/119 -- train_loss: 1.2108 
2025-08-10 16:35:20,514 - INFO - Epoch: 4/500, Iter: 11/119 -- train_loss: 1.1958 
2025-08-10 16:35:23,503 - INFO - Epoch: 4/500, Iter: 12/119 -- train_loss: 1.2066 
2025-08-10 16:35:23,778 - INFO - Epoch: 4/500, Iter: 13/119 -- train_loss: 1.2010 
2025-08-10 1

[1/20]   5%|5          [00:00<?]

2025-08-10 16:39:12,849 - INFO - Epoch: 5/500, Iter: 1/119 -- train_loss: 1.2018 


[1/119]   1%|           [00:00<?]

2025-08-10 16:39:13,131 - INFO - Epoch: 5/500, Iter: 2/119 -- train_loss: 1.1994 
2025-08-10 16:39:13,431 - INFO - Epoch: 5/500, Iter: 3/119 -- train_loss: 1.2019 
2025-08-10 16:39:13,759 - INFO - Epoch: 5/500, Iter: 4/119 -- train_loss: 1.1990 
2025-08-10 16:39:20,537 - INFO - Epoch: 5/500, Iter: 5/119 -- train_loss: 1.1981 
2025-08-10 16:39:20,866 - INFO - Epoch: 5/500, Iter: 6/119 -- train_loss: 1.1481 
2025-08-10 16:39:21,177 - INFO - Epoch: 5/500, Iter: 7/119 -- train_loss: 1.2043 
2025-08-10 16:39:21,476 - INFO - Epoch: 5/500, Iter: 8/119 -- train_loss: 1.1602 
2025-08-10 16:39:21,803 - INFO - Epoch: 5/500, Iter: 9/119 -- train_loss: 1.1630 
2025-08-10 16:39:22,133 - INFO - Epoch: 5/500, Iter: 10/119 -- train_loss: 1.2003 
2025-08-10 16:39:22,459 - INFO - Epoch: 5/500, Iter: 11/119 -- train_loss: 1.1747 
2025-08-10 16:39:22,767 - INFO - Epoch: 5/500, Iter: 12/119 -- train_loss: 1.1988 
2025-08-10 16:39:26,823 - INFO - Epoch: 5/500, Iter: 13/119 -- train_loss: 1.1992 
2025-08-10 1

[1/20]   5%|5          [00:00<?]

2025-08-10 16:43:02,829 - INFO - Epoch: 6/500, Iter: 1/119 -- train_loss: 1.1869 


[1/119]   1%|           [00:00<?]

2025-08-10 16:43:06,106 - INFO - Epoch: 6/500, Iter: 2/119 -- train_loss: 1.0456 
2025-08-10 16:43:06,397 - INFO - Epoch: 6/500, Iter: 3/119 -- train_loss: 1.1738 
2025-08-10 16:43:06,686 - INFO - Epoch: 6/500, Iter: 4/119 -- train_loss: 1.1958 
2025-08-10 16:43:09,006 - INFO - Epoch: 6/500, Iter: 5/119 -- train_loss: 1.1734 
2025-08-10 16:43:09,327 - INFO - Epoch: 6/500, Iter: 6/119 -- train_loss: 1.1853 
2025-08-10 16:43:09,609 - INFO - Epoch: 6/500, Iter: 7/119 -- train_loss: 1.1944 
2025-08-10 16:43:09,924 - INFO - Epoch: 6/500, Iter: 8/119 -- train_loss: 1.1952 
2025-08-10 16:43:10,361 - INFO - Epoch: 6/500, Iter: 9/119 -- train_loss: 1.1964 
2025-08-10 16:43:10,647 - INFO - Epoch: 6/500, Iter: 10/119 -- train_loss: 1.1957 
2025-08-10 16:43:10,953 - INFO - Epoch: 6/500, Iter: 11/119 -- train_loss: 1.1951 
2025-08-10 16:43:11,272 - INFO - Epoch: 6/500, Iter: 12/119 -- train_loss: 1.1997 
2025-08-10 16:43:20,634 - INFO - Epoch: 6/500, Iter: 13/119 -- train_loss: 1.1988 
2025-08-10 1

[1/20]   5%|5          [00:00<?]

2025-08-10 16:46:56,781 - INFO - Epoch: 7/500, Iter: 1/119 -- train_loss: 1.1931 


[1/119]   1%|           [00:00<?]

2025-08-10 16:47:01,887 - INFO - Epoch: 7/500, Iter: 2/119 -- train_loss: 1.1957 
2025-08-10 16:47:02,182 - INFO - Epoch: 7/500, Iter: 3/119 -- train_loss: 1.1959 
2025-08-10 16:47:02,522 - INFO - Epoch: 7/500, Iter: 4/119 -- train_loss: 1.1938 
2025-08-10 16:47:02,853 - INFO - Epoch: 7/500, Iter: 5/119 -- train_loss: 1.1892 
2025-08-10 16:47:03,162 - INFO - Epoch: 7/500, Iter: 6/119 -- train_loss: 1.1321 
2025-08-10 16:47:03,478 - INFO - Epoch: 7/500, Iter: 7/119 -- train_loss: 1.1696 
2025-08-10 16:47:03,786 - INFO - Epoch: 7/500, Iter: 8/119 -- train_loss: 1.1651 
2025-08-10 16:47:04,076 - INFO - Epoch: 7/500, Iter: 9/119 -- train_loss: 1.1551 
2025-08-10 16:47:06,852 - INFO - Epoch: 7/500, Iter: 10/119 -- train_loss: 1.1765 
2025-08-10 16:47:07,132 - INFO - Epoch: 7/500, Iter: 11/119 -- train_loss: 1.1936 
2025-08-10 16:47:08,139 - INFO - Epoch: 7/500, Iter: 12/119 -- train_loss: 1.1804 
2025-08-10 16:47:08,432 - INFO - Epoch: 7/500, Iter: 13/119 -- train_loss: 1.1952 
2025-08-10 1

[1/20]   5%|5          [00:00<?]

2025-08-10 16:50:49,885 - INFO - Epoch: 8/500, Iter: 1/119 -- train_loss: 1.1941 


[1/119]   1%|           [00:00<?]

2025-08-10 16:50:51,742 - INFO - Epoch: 8/500, Iter: 2/119 -- train_loss: 1.1924 
2025-08-10 16:50:52,800 - INFO - Epoch: 8/500, Iter: 3/119 -- train_loss: 1.1896 
2025-08-10 16:50:59,156 - INFO - Epoch: 8/500, Iter: 4/119 -- train_loss: 1.1732 
2025-08-10 16:50:59,414 - INFO - Epoch: 8/500, Iter: 5/119 -- train_loss: 1.1866 
2025-08-10 16:50:59,673 - INFO - Epoch: 8/500, Iter: 6/119 -- train_loss: 1.1562 
2025-08-10 16:50:59,958 - INFO - Epoch: 8/500, Iter: 7/119 -- train_loss: 1.1861 
2025-08-10 16:51:00,264 - INFO - Epoch: 8/500, Iter: 8/119 -- train_loss: 1.1918 
2025-08-10 16:51:00,538 - INFO - Epoch: 8/500, Iter: 9/119 -- train_loss: 1.1716 
2025-08-10 16:51:02,856 - INFO - Epoch: 8/500, Iter: 10/119 -- train_loss: 1.1470 
2025-08-10 16:51:03,155 - INFO - Epoch: 8/500, Iter: 11/119 -- train_loss: 1.1925 
2025-08-10 16:51:08,317 - INFO - Epoch: 8/500, Iter: 12/119 -- train_loss: 1.1470 
2025-08-10 16:51:08,567 - INFO - Epoch: 8/500, Iter: 13/119 -- train_loss: 1.1899 
2025-08-10 1

[1/20]   5%|5          [00:00<?]

2025-08-10 16:54:44,575 - INFO - Epoch: 9/500, Iter: 1/119 -- train_loss: 1.1881 


[1/119]   1%|           [00:00<?]

2025-08-10 16:54:44,845 - INFO - Epoch: 9/500, Iter: 2/119 -- train_loss: 1.1878 
2025-08-10 16:54:45,153 - INFO - Epoch: 9/500, Iter: 3/119 -- train_loss: 1.1934 
2025-08-10 16:54:45,469 - INFO - Epoch: 9/500, Iter: 4/119 -- train_loss: 1.1487 
2025-08-10 16:54:45,802 - INFO - Epoch: 9/500, Iter: 5/119 -- train_loss: 1.1873 
2025-08-10 16:54:46,901 - INFO - Epoch: 9/500, Iter: 6/119 -- train_loss: 1.1722 
2025-08-10 16:54:47,197 - INFO - Epoch: 9/500, Iter: 7/119 -- train_loss: 1.1909 
2025-08-10 16:54:47,472 - INFO - Epoch: 9/500, Iter: 8/119 -- train_loss: 1.1922 
2025-08-10 16:54:50,529 - INFO - Epoch: 9/500, Iter: 9/119 -- train_loss: 1.1905 
2025-08-10 16:54:52,021 - INFO - Epoch: 9/500, Iter: 10/119 -- train_loss: 1.1639 
2025-08-10 16:54:52,318 - INFO - Epoch: 9/500, Iter: 11/119 -- train_loss: 1.1806 
2025-08-10 16:54:52,612 - INFO - Epoch: 9/500, Iter: 12/119 -- train_loss: 1.0859 
2025-08-10 16:54:53,938 - INFO - Epoch: 9/500, Iter: 13/119 -- train_loss: 1.1849 
2025-08-10 1

[1/20]   5%|5          [00:00<?]

2025-08-10 16:58:23,950 - INFO - Epoch: 10/500, Iter: 1/119 -- train_loss: 1.1847 


[1/119]   1%|           [00:00<?]

2025-08-10 16:58:28,134 - INFO - Epoch: 10/500, Iter: 2/119 -- train_loss: 1.1902 
2025-08-10 16:58:28,435 - INFO - Epoch: 10/500, Iter: 3/119 -- train_loss: 1.1440 
2025-08-10 16:58:28,787 - INFO - Epoch: 10/500, Iter: 4/119 -- train_loss: 1.1802 
2025-08-10 16:58:29,132 - INFO - Epoch: 10/500, Iter: 5/119 -- train_loss: 1.1475 
2025-08-10 16:58:29,433 - INFO - Epoch: 10/500, Iter: 6/119 -- train_loss: 1.1456 
2025-08-10 16:58:29,755 - INFO - Epoch: 10/500, Iter: 7/119 -- train_loss: 1.1893 
2025-08-10 16:58:30,055 - INFO - Epoch: 10/500, Iter: 8/119 -- train_loss: 1.1751 
2025-08-10 16:58:39,333 - INFO - Epoch: 10/500, Iter: 9/119 -- train_loss: 1.1877 
2025-08-10 16:58:39,616 - INFO - Epoch: 10/500, Iter: 10/119 -- train_loss: 1.1870 
2025-08-10 16:58:39,923 - INFO - Epoch: 10/500, Iter: 11/119 -- train_loss: 1.1872 
2025-08-10 16:58:40,243 - INFO - Epoch: 10/500, Iter: 12/119 -- train_loss: 1.1827 
2025-08-10 16:58:40,550 - INFO - Epoch: 10/500, Iter: 13/119 -- train_loss: 1.1506 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:02:12,501 - INFO - Epoch: 11/500, Iter: 1/119 -- train_loss: 1.1484 


[1/119]   1%|           [00:00<?]

2025-08-10 17:02:15,404 - INFO - Epoch: 11/500, Iter: 2/119 -- train_loss: 1.1859 
2025-08-10 17:02:15,715 - INFO - Epoch: 11/500, Iter: 3/119 -- train_loss: 1.1755 
2025-08-10 17:02:18,503 - INFO - Epoch: 11/500, Iter: 4/119 -- train_loss: 1.1870 
2025-08-10 17:02:18,757 - INFO - Epoch: 11/500, Iter: 5/119 -- train_loss: 1.1739 
2025-08-10 17:02:19,034 - INFO - Epoch: 11/500, Iter: 6/119 -- train_loss: 1.0132 
2025-08-10 17:02:19,361 - INFO - Epoch: 11/500, Iter: 7/119 -- train_loss: 1.1886 
2025-08-10 17:02:19,650 - INFO - Epoch: 11/500, Iter: 8/119 -- train_loss: 1.1776 
2025-08-10 17:02:23,175 - INFO - Epoch: 11/500, Iter: 9/119 -- train_loss: 1.1700 
2025-08-10 17:02:23,510 - INFO - Epoch: 11/500, Iter: 10/119 -- train_loss: 1.1875 
2025-08-10 17:02:23,789 - INFO - Epoch: 11/500, Iter: 11/119 -- train_loss: 1.1814 
2025-08-10 17:02:24,071 - INFO - Epoch: 11/500, Iter: 12/119 -- train_loss: 1.1730 
2025-08-10 17:02:24,385 - INFO - Epoch: 11/500, Iter: 13/119 -- train_loss: 1.0749 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:06:02,593 - INFO - Epoch: 12/500, Iter: 1/119 -- train_loss: 1.1856 


[1/119]   1%|           [00:00<?]

2025-08-10 17:06:02,932 - INFO - Epoch: 12/500, Iter: 2/119 -- train_loss: 1.1848 
2025-08-10 17:06:03,216 - INFO - Epoch: 12/500, Iter: 3/119 -- train_loss: 1.1727 
2025-08-10 17:06:03,533 - INFO - Epoch: 12/500, Iter: 4/119 -- train_loss: 1.1805 
2025-08-10 17:06:03,842 - INFO - Epoch: 12/500, Iter: 5/119 -- train_loss: 1.1484 
2025-08-10 17:06:04,162 - INFO - Epoch: 12/500, Iter: 6/119 -- train_loss: 1.1853 
2025-08-10 17:06:04,495 - INFO - Epoch: 12/500, Iter: 7/119 -- train_loss: 1.1723 
2025-08-10 17:06:04,819 - INFO - Epoch: 12/500, Iter: 8/119 -- train_loss: 1.1688 
2025-08-10 17:06:11,567 - INFO - Epoch: 12/500, Iter: 9/119 -- train_loss: 1.1754 
2025-08-10 17:06:11,844 - INFO - Epoch: 12/500, Iter: 10/119 -- train_loss: 1.1842 
2025-08-10 17:06:12,108 - INFO - Epoch: 12/500, Iter: 11/119 -- train_loss: 1.1848 
2025-08-10 17:06:12,426 - INFO - Epoch: 12/500, Iter: 12/119 -- train_loss: 1.1850 
2025-08-10 17:06:12,703 - INFO - Epoch: 12/500, Iter: 13/119 -- train_loss: 1.1808 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:09:44,932 - INFO - Epoch: 13/500, Iter: 1/119 -- train_loss: 1.1495 


[1/119]   1%|           [00:00<?]

2025-08-10 17:09:45,241 - INFO - Epoch: 13/500, Iter: 2/119 -- train_loss: 1.1832 
2025-08-10 17:09:45,574 - INFO - Epoch: 13/500, Iter: 3/119 -- train_loss: 1.1855 
2025-08-10 17:09:45,896 - INFO - Epoch: 13/500, Iter: 4/119 -- train_loss: 1.1867 
2025-08-10 17:09:46,227 - INFO - Epoch: 13/500, Iter: 5/119 -- train_loss: 1.1843 
2025-08-10 17:09:46,549 - INFO - Epoch: 13/500, Iter: 6/119 -- train_loss: 1.1672 
2025-08-10 17:09:46,855 - INFO - Epoch: 13/500, Iter: 7/119 -- train_loss: 1.1832 
2025-08-10 17:09:47,194 - INFO - Epoch: 13/500, Iter: 8/119 -- train_loss: 1.1859 
2025-08-10 17:09:54,940 - INFO - Epoch: 13/500, Iter: 9/119 -- train_loss: 1.1859 
2025-08-10 17:09:55,225 - INFO - Epoch: 13/500, Iter: 10/119 -- train_loss: 1.1588 
2025-08-10 17:09:55,516 - INFO - Epoch: 13/500, Iter: 11/119 -- train_loss: 1.1844 
2025-08-10 17:09:55,826 - INFO - Epoch: 13/500, Iter: 12/119 -- train_loss: 1.1468 
2025-08-10 17:09:56,155 - INFO - Epoch: 13/500, Iter: 13/119 -- train_loss: 1.1727 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:13:30,831 - INFO - Epoch: 14/500, Iter: 1/119 -- train_loss: 1.1868 


[1/119]   1%|           [00:00<?]

2025-08-10 17:13:34,061 - INFO - Epoch: 14/500, Iter: 2/119 -- train_loss: 1.1160 
2025-08-10 17:13:34,371 - INFO - Epoch: 14/500, Iter: 3/119 -- train_loss: 1.1846 
2025-08-10 17:13:34,702 - INFO - Epoch: 14/500, Iter: 4/119 -- train_loss: 1.1764 
2025-08-10 17:13:34,986 - INFO - Epoch: 14/500, Iter: 5/119 -- train_loss: 1.1617 
2025-08-10 17:13:35,281 - INFO - Epoch: 14/500, Iter: 6/119 -- train_loss: 1.1822 
2025-08-10 17:13:35,587 - INFO - Epoch: 14/500, Iter: 7/119 -- train_loss: 1.1837 
2025-08-10 17:13:35,888 - INFO - Epoch: 14/500, Iter: 8/119 -- train_loss: 1.1872 
2025-08-10 17:13:36,219 - INFO - Epoch: 14/500, Iter: 9/119 -- train_loss: 1.1843 
2025-08-10 17:13:39,131 - INFO - Epoch: 14/500, Iter: 10/119 -- train_loss: 1.1829 
2025-08-10 17:13:40,269 - INFO - Epoch: 14/500, Iter: 11/119 -- train_loss: 1.1842 
2025-08-10 17:13:40,571 - INFO - Epoch: 14/500, Iter: 12/119 -- train_loss: 1.1841 
2025-08-10 17:13:40,900 - INFO - Epoch: 14/500, Iter: 13/119 -- train_loss: 1.1778 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:17:12,804 - INFO - Epoch: 15/500, Iter: 1/119 -- train_loss: 1.1823 


[1/119]   1%|           [00:00<?]

2025-08-10 17:17:16,361 - INFO - Epoch: 15/500, Iter: 2/119 -- train_loss: 1.1817 
2025-08-10 17:17:16,641 - INFO - Epoch: 15/500, Iter: 3/119 -- train_loss: 1.1811 
2025-08-10 17:17:16,947 - INFO - Epoch: 15/500, Iter: 4/119 -- train_loss: 1.1816 
2025-08-10 17:17:17,254 - INFO - Epoch: 15/500, Iter: 5/119 -- train_loss: 1.1608 
2025-08-10 17:17:19,714 - INFO - Epoch: 15/500, Iter: 6/119 -- train_loss: 1.1331 
2025-08-10 17:17:20,073 - INFO - Epoch: 15/500, Iter: 7/119 -- train_loss: 1.0901 
2025-08-10 17:17:21,604 - INFO - Epoch: 15/500, Iter: 8/119 -- train_loss: 1.0807 
2025-08-10 17:17:21,920 - INFO - Epoch: 15/500, Iter: 9/119 -- train_loss: 1.1839 
2025-08-10 17:17:22,235 - INFO - Epoch: 15/500, Iter: 10/119 -- train_loss: 1.1826 
2025-08-10 17:17:22,544 - INFO - Epoch: 15/500, Iter: 11/119 -- train_loss: 1.1824 
2025-08-10 17:17:22,859 - INFO - Epoch: 15/500, Iter: 12/119 -- train_loss: 1.1782 
2025-08-10 17:17:23,207 - INFO - Epoch: 15/500, Iter: 13/119 -- train_loss: 1.1743 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:21:01,138 - INFO - Epoch: 16/500, Iter: 1/119 -- train_loss: 1.1826 


[1/119]   1%|           [00:00<?]

2025-08-10 17:21:04,309 - INFO - Epoch: 16/500, Iter: 2/119 -- train_loss: 1.1826 
2025-08-10 17:21:04,599 - INFO - Epoch: 16/500, Iter: 3/119 -- train_loss: 1.1786 
2025-08-10 17:21:04,926 - INFO - Epoch: 16/500, Iter: 4/119 -- train_loss: 1.1578 
2025-08-10 17:21:05,228 - INFO - Epoch: 16/500, Iter: 5/119 -- train_loss: 1.1815 
2025-08-10 17:21:05,493 - INFO - Epoch: 16/500, Iter: 6/119 -- train_loss: 1.1388 
2025-08-10 17:21:05,820 - INFO - Epoch: 16/500, Iter: 7/119 -- train_loss: 1.1817 
2025-08-10 17:21:06,128 - INFO - Epoch: 16/500, Iter: 8/119 -- train_loss: 1.1699 
2025-08-10 17:21:11,330 - INFO - Epoch: 16/500, Iter: 9/119 -- train_loss: 1.1813 
2025-08-10 17:21:11,665 - INFO - Epoch: 16/500, Iter: 10/119 -- train_loss: 1.1771 
2025-08-10 17:21:11,936 - INFO - Epoch: 16/500, Iter: 11/119 -- train_loss: 1.1678 
2025-08-10 17:21:12,258 - INFO - Epoch: 16/500, Iter: 12/119 -- train_loss: 1.1608 
2025-08-10 17:21:12,551 - INFO - Epoch: 16/500, Iter: 13/119 -- train_loss: 1.1631 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:24:59,648 - INFO - Epoch: 17/500, Iter: 1/119 -- train_loss: 1.1820 


[1/119]   1%|           [00:00<?]

2025-08-10 17:24:59,923 - INFO - Epoch: 17/500, Iter: 2/119 -- train_loss: 1.1466 
2025-08-10 17:25:00,231 - INFO - Epoch: 17/500, Iter: 3/119 -- train_loss: 1.1740 
2025-08-10 17:25:00,536 - INFO - Epoch: 17/500, Iter: 4/119 -- train_loss: 1.1714 
2025-08-10 17:25:00,889 - INFO - Epoch: 17/500, Iter: 5/119 -- train_loss: 1.1831 
2025-08-10 17:25:01,184 - INFO - Epoch: 17/500, Iter: 6/119 -- train_loss: 1.1687 
2025-08-10 17:25:01,465 - INFO - Epoch: 17/500, Iter: 7/119 -- train_loss: 1.1090 
2025-08-10 17:25:01,766 - INFO - Epoch: 17/500, Iter: 8/119 -- train_loss: 1.1485 
2025-08-10 17:25:04,181 - INFO - Epoch: 17/500, Iter: 9/119 -- train_loss: 1.1858 
2025-08-10 17:25:04,492 - INFO - Epoch: 17/500, Iter: 10/119 -- train_loss: 1.1530 
2025-08-10 17:25:05,870 - INFO - Epoch: 17/500, Iter: 11/119 -- train_loss: 1.1806 
2025-08-10 17:25:06,159 - INFO - Epoch: 17/500, Iter: 12/119 -- train_loss: 1.1791 
2025-08-10 17:25:11,135 - INFO - Epoch: 17/500, Iter: 13/119 -- train_loss: 1.1505 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:28:47,960 - INFO - Epoch: 18/500, Iter: 1/119 -- train_loss: 1.1812 


[1/119]   1%|           [00:00<?]

2025-08-10 17:28:48,276 - INFO - Epoch: 18/500, Iter: 2/119 -- train_loss: 1.1797 
2025-08-10 17:28:49,396 - INFO - Epoch: 18/500, Iter: 3/119 -- train_loss: 1.1823 
2025-08-10 17:28:49,702 - INFO - Epoch: 18/500, Iter: 4/119 -- train_loss: 1.1810 
2025-08-10 17:28:50,379 - INFO - Epoch: 18/500, Iter: 5/119 -- train_loss: 1.1468 
2025-08-10 17:28:51,609 - INFO - Epoch: 18/500, Iter: 6/119 -- train_loss: 1.1677 
2025-08-10 17:28:54,975 - INFO - Epoch: 18/500, Iter: 7/119 -- train_loss: 1.0909 
2025-08-10 17:28:55,293 - INFO - Epoch: 18/500, Iter: 8/119 -- train_loss: 1.1578 
2025-08-10 17:28:57,902 - INFO - Epoch: 18/500, Iter: 9/119 -- train_loss: 1.1845 
2025-08-10 17:28:58,223 - INFO - Epoch: 18/500, Iter: 10/119 -- train_loss: 1.1828 
2025-08-10 17:28:58,567 - INFO - Epoch: 18/500, Iter: 11/119 -- train_loss: 1.1669 
2025-08-10 17:28:58,877 - INFO - Epoch: 18/500, Iter: 12/119 -- train_loss: 1.1651 
2025-08-10 17:28:59,193 - INFO - Epoch: 18/500, Iter: 13/119 -- train_loss: 1.1476 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:32:36,107 - INFO - Epoch: 19/500, Iter: 1/119 -- train_loss: 1.0634 


[1/119]   1%|           [00:00<?]

2025-08-10 17:32:36,434 - INFO - Epoch: 19/500, Iter: 2/119 -- train_loss: 1.1690 
2025-08-10 17:32:36,712 - INFO - Epoch: 19/500, Iter: 3/119 -- train_loss: 1.1598 
2025-08-10 17:32:41,120 - INFO - Epoch: 19/500, Iter: 4/119 -- train_loss: 1.1813 
2025-08-10 17:32:41,416 - INFO - Epoch: 19/500, Iter: 5/119 -- train_loss: 1.1756 
2025-08-10 17:32:41,705 - INFO - Epoch: 19/500, Iter: 6/119 -- train_loss: 1.1772 
2025-08-10 17:32:42,019 - INFO - Epoch: 19/500, Iter: 7/119 -- train_loss: 1.1832 
2025-08-10 17:32:42,320 - INFO - Epoch: 19/500, Iter: 8/119 -- train_loss: 1.1546 
2025-08-10 17:32:42,934 - INFO - Epoch: 19/500, Iter: 9/119 -- train_loss: 1.1772 
2025-08-10 17:32:43,259 - INFO - Epoch: 19/500, Iter: 10/119 -- train_loss: 1.1716 
2025-08-10 17:32:44,686 - INFO - Epoch: 19/500, Iter: 11/119 -- train_loss: 1.1569 
2025-08-10 17:32:46,858 - INFO - Epoch: 19/500, Iter: 12/119 -- train_loss: 1.1859 
2025-08-10 17:32:47,197 - INFO - Epoch: 19/500, Iter: 13/119 -- train_loss: 1.1807 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:36:30,501 - INFO - Epoch: 20/500, Iter: 1/119 -- train_loss: 1.1675 


[1/119]   1%|           [00:00<?]

2025-08-10 17:36:30,826 - INFO - Epoch: 20/500, Iter: 2/119 -- train_loss: 1.1728 
2025-08-10 17:36:31,135 - INFO - Epoch: 20/500, Iter: 3/119 -- train_loss: 1.1382 
2025-08-10 17:36:31,402 - INFO - Epoch: 20/500, Iter: 4/119 -- train_loss: 1.1805 
2025-08-10 17:36:31,718 - INFO - Epoch: 20/500, Iter: 5/119 -- train_loss: 1.1805 
2025-08-10 17:36:32,032 - INFO - Epoch: 20/500, Iter: 6/119 -- train_loss: 1.1579 
2025-08-10 17:36:33,462 - INFO - Epoch: 20/500, Iter: 7/119 -- train_loss: 1.1874 
2025-08-10 17:36:33,741 - INFO - Epoch: 20/500, Iter: 8/119 -- train_loss: 1.1724 
2025-08-10 17:36:35,521 - INFO - Epoch: 20/500, Iter: 9/119 -- train_loss: 1.1815 
2025-08-10 17:36:35,836 - INFO - Epoch: 20/500, Iter: 10/119 -- train_loss: 1.1812 
2025-08-10 17:36:36,158 - INFO - Epoch: 20/500, Iter: 11/119 -- train_loss: 1.1813 
2025-08-10 17:36:36,535 - INFO - Epoch: 20/500, Iter: 12/119 -- train_loss: 1.1817 
2025-08-10 17:36:36,841 - INFO - Epoch: 20/500, Iter: 13/119 -- train_loss: 1.1822 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:40:27,574 - INFO - Epoch: 21/500, Iter: 1/119 -- train_loss: 1.1797 


[1/119]   1%|           [00:00<?]

2025-08-10 17:40:27,895 - INFO - Epoch: 21/500, Iter: 2/119 -- train_loss: 1.1617 
2025-08-10 17:40:28,231 - INFO - Epoch: 21/500, Iter: 3/119 -- train_loss: 1.1812 
2025-08-10 17:40:28,550 - INFO - Epoch: 21/500, Iter: 4/119 -- train_loss: 1.1800 
2025-08-10 17:40:28,868 - INFO - Epoch: 21/500, Iter: 5/119 -- train_loss: 1.1836 
2025-08-10 17:40:29,180 - INFO - Epoch: 21/500, Iter: 6/119 -- train_loss: 1.1734 
2025-08-10 17:40:32,279 - INFO - Epoch: 21/500, Iter: 7/119 -- train_loss: 1.1519 
2025-08-10 17:40:32,565 - INFO - Epoch: 21/500, Iter: 8/119 -- train_loss: 1.1596 
2025-08-10 17:40:32,860 - INFO - Epoch: 21/500, Iter: 9/119 -- train_loss: 1.1481 
2025-08-10 17:40:33,174 - INFO - Epoch: 21/500, Iter: 10/119 -- train_loss: 1.1403 
2025-08-10 17:40:36,541 - INFO - Epoch: 21/500, Iter: 11/119 -- train_loss: 1.1762 
2025-08-10 17:40:36,862 - INFO - Epoch: 21/500, Iter: 12/119 -- train_loss: 1.1850 
2025-08-10 17:40:37,158 - INFO - Epoch: 21/500, Iter: 13/119 -- train_loss: 1.1389 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:44:10,973 - INFO - Epoch: 22/500, Iter: 1/119 -- train_loss: 1.1549 


[1/119]   1%|           [00:00<?]

2025-08-10 17:44:12,467 - INFO - Epoch: 22/500, Iter: 2/119 -- train_loss: 1.1465 
2025-08-10 17:44:13,741 - INFO - Epoch: 22/500, Iter: 3/119 -- train_loss: 1.1242 
2025-08-10 17:44:17,286 - INFO - Epoch: 22/500, Iter: 4/119 -- train_loss: 1.1833 
2025-08-10 17:44:17,611 - INFO - Epoch: 22/500, Iter: 5/119 -- train_loss: 1.1817 
2025-08-10 17:44:17,937 - INFO - Epoch: 22/500, Iter: 6/119 -- train_loss: 1.1589 
2025-08-10 17:44:18,236 - INFO - Epoch: 22/500, Iter: 7/119 -- train_loss: 1.1787 
2025-08-10 17:44:18,559 - INFO - Epoch: 22/500, Iter: 8/119 -- train_loss: 1.1755 
2025-08-10 17:44:19,207 - INFO - Epoch: 22/500, Iter: 9/119 -- train_loss: 1.1788 
2025-08-10 17:44:19,522 - INFO - Epoch: 22/500, Iter: 10/119 -- train_loss: 1.1756 
2025-08-10 17:44:22,436 - INFO - Epoch: 22/500, Iter: 11/119 -- train_loss: 1.1803 
2025-08-10 17:44:23,573 - INFO - Epoch: 22/500, Iter: 12/119 -- train_loss: 1.1745 
2025-08-10 17:44:23,840 - INFO - Epoch: 22/500, Iter: 13/119 -- train_loss: 1.1701 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:47:58,484 - INFO - Epoch: 23/500, Iter: 1/119 -- train_loss: 1.1497 


[1/119]   1%|           [00:00<?]

2025-08-10 17:47:58,820 - INFO - Epoch: 23/500, Iter: 2/119 -- train_loss: 1.1784 
2025-08-10 17:47:59,097 - INFO - Epoch: 23/500, Iter: 3/119 -- train_loss: 1.1818 
2025-08-10 17:47:59,361 - INFO - Epoch: 23/500, Iter: 4/119 -- train_loss: 1.1796 
2025-08-10 17:47:59,670 - INFO - Epoch: 23/500, Iter: 5/119 -- train_loss: 1.1545 
2025-08-10 17:47:59,990 - INFO - Epoch: 23/500, Iter: 6/119 -- train_loss: 1.1748 
2025-08-10 17:48:00,361 - INFO - Epoch: 23/500, Iter: 7/119 -- train_loss: 1.0368 
2025-08-10 17:48:00,651 - INFO - Epoch: 23/500, Iter: 8/119 -- train_loss: 1.1784 
2025-08-10 17:48:04,645 - INFO - Epoch: 23/500, Iter: 9/119 -- train_loss: 1.1724 
2025-08-10 17:48:04,964 - INFO - Epoch: 23/500, Iter: 10/119 -- train_loss: 1.1762 
2025-08-10 17:48:05,248 - INFO - Epoch: 23/500, Iter: 11/119 -- train_loss: 1.1794 
2025-08-10 17:48:05,556 - INFO - Epoch: 23/500, Iter: 12/119 -- train_loss: 1.1786 
2025-08-10 17:48:05,854 - INFO - Epoch: 23/500, Iter: 13/119 -- train_loss: 1.1706 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:51:47,795 - INFO - Epoch: 24/500, Iter: 1/119 -- train_loss: 1.1777 


[1/119]   1%|           [00:00<?]

2025-08-10 17:51:48,102 - INFO - Epoch: 24/500, Iter: 2/119 -- train_loss: 1.1125 
2025-08-10 17:51:48,366 - INFO - Epoch: 24/500, Iter: 3/119 -- train_loss: 1.1809 
2025-08-10 17:51:48,680 - INFO - Epoch: 24/500, Iter: 4/119 -- train_loss: 1.1316 
2025-08-10 17:51:49,017 - INFO - Epoch: 24/500, Iter: 5/119 -- train_loss: 1.1722 
2025-08-10 17:51:49,313 - INFO - Epoch: 24/500, Iter: 6/119 -- train_loss: 1.1805 
2025-08-10 17:51:49,621 - INFO - Epoch: 24/500, Iter: 7/119 -- train_loss: 1.1294 
2025-08-10 17:51:49,910 - INFO - Epoch: 24/500, Iter: 8/119 -- train_loss: 1.1818 
2025-08-10 17:51:53,474 - INFO - Epoch: 24/500, Iter: 9/119 -- train_loss: 1.1680 
2025-08-10 17:51:53,808 - INFO - Epoch: 24/500, Iter: 10/119 -- train_loss: 1.1801 
2025-08-10 17:51:54,094 - INFO - Epoch: 24/500, Iter: 11/119 -- train_loss: 1.1807 
2025-08-10 17:51:54,922 - INFO - Epoch: 24/500, Iter: 12/119 -- train_loss: 1.1294 
2025-08-10 17:51:55,238 - INFO - Epoch: 24/500, Iter: 13/119 -- train_loss: 1.1785 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:55:30,091 - INFO - Epoch: 25/500, Iter: 1/119 -- train_loss: 1.1606 


[1/119]   1%|           [00:00<?]

2025-08-10 17:55:33,700 - INFO - Epoch: 25/500, Iter: 2/119 -- train_loss: 1.1656 
2025-08-10 17:55:34,007 - INFO - Epoch: 25/500, Iter: 3/119 -- train_loss: 1.0716 
2025-08-10 17:55:34,300 - INFO - Epoch: 25/500, Iter: 4/119 -- train_loss: 1.1635 
2025-08-10 17:55:34,622 - INFO - Epoch: 25/500, Iter: 5/119 -- train_loss: 1.1779 
2025-08-10 17:55:34,902 - INFO - Epoch: 25/500, Iter: 6/119 -- train_loss: 1.1429 
2025-08-10 17:55:35,237 - INFO - Epoch: 25/500, Iter: 7/119 -- train_loss: 1.1800 
2025-08-10 17:55:35,576 - INFO - Epoch: 25/500, Iter: 8/119 -- train_loss: 1.1561 
2025-08-10 17:55:35,920 - INFO - Epoch: 25/500, Iter: 9/119 -- train_loss: 1.1489 
2025-08-10 17:55:42,545 - INFO - Epoch: 25/500, Iter: 10/119 -- train_loss: 1.1781 
2025-08-10 17:55:42,835 - INFO - Epoch: 25/500, Iter: 11/119 -- train_loss: 1.1722 
2025-08-10 17:55:43,140 - INFO - Epoch: 25/500, Iter: 12/119 -- train_loss: 1.1813 
2025-08-10 17:55:43,427 - INFO - Epoch: 25/500, Iter: 13/119 -- train_loss: 1.1755 


[1/20]   5%|5          [00:00<?]

2025-08-10 17:59:26,057 - INFO - Epoch: 26/500, Iter: 1/119 -- train_loss: 1.1716 


[1/119]   1%|           [00:00<?]

2025-08-10 17:59:26,367 - INFO - Epoch: 26/500, Iter: 2/119 -- train_loss: 1.1796 
2025-08-10 17:59:26,689 - INFO - Epoch: 26/500, Iter: 3/119 -- train_loss: 1.1841 
2025-08-10 17:59:29,245 - INFO - Epoch: 26/500, Iter: 4/119 -- train_loss: 1.1755 
2025-08-10 17:59:29,564 - INFO - Epoch: 26/500, Iter: 5/119 -- train_loss: 1.1811 
2025-08-10 17:59:29,874 - INFO - Epoch: 26/500, Iter: 6/119 -- train_loss: 1.1790 
2025-08-10 17:59:30,178 - INFO - Epoch: 26/500, Iter: 7/119 -- train_loss: 1.1738 
2025-08-10 17:59:30,466 - INFO - Epoch: 26/500, Iter: 8/119 -- train_loss: 1.1795 
2025-08-10 17:59:37,270 - INFO - Epoch: 26/500, Iter: 9/119 -- train_loss: 1.1839 
2025-08-10 17:59:37,574 - INFO - Epoch: 26/500, Iter: 10/119 -- train_loss: 1.1796 
2025-08-10 17:59:37,905 - INFO - Epoch: 26/500, Iter: 11/119 -- train_loss: 1.1566 
2025-08-10 17:59:38,593 - INFO - Epoch: 26/500, Iter: 12/119 -- train_loss: 1.1671 
2025-08-10 17:59:41,769 - INFO - Epoch: 26/500, Iter: 13/119 -- train_loss: 1.1576 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:03:08,034 - INFO - Epoch: 27/500, Iter: 1/119 -- train_loss: 1.1781 


[1/119]   1%|           [00:00<?]

2025-08-10 18:03:08,331 - INFO - Epoch: 27/500, Iter: 2/119 -- train_loss: 1.1702 
2025-08-10 18:03:15,194 - INFO - Epoch: 27/500, Iter: 3/119 -- train_loss: 1.1656 
2025-08-10 18:03:15,460 - INFO - Epoch: 27/500, Iter: 4/119 -- train_loss: 1.1796 
2025-08-10 18:03:15,742 - INFO - Epoch: 27/500, Iter: 5/119 -- train_loss: 1.1801 
2025-08-10 18:03:16,068 - INFO - Epoch: 27/500, Iter: 6/119 -- train_loss: 1.1532 
2025-08-10 18:03:16,362 - INFO - Epoch: 27/500, Iter: 7/119 -- train_loss: 1.1483 
2025-08-10 18:03:16,675 - INFO - Epoch: 27/500, Iter: 8/119 -- train_loss: 1.1721 
2025-08-10 18:03:16,989 - INFO - Epoch: 27/500, Iter: 9/119 -- train_loss: 1.0629 
2025-08-10 18:03:17,294 - INFO - Epoch: 27/500, Iter: 10/119 -- train_loss: 1.1608 
2025-08-10 18:03:22,560 - INFO - Epoch: 27/500, Iter: 11/119 -- train_loss: 1.1540 
2025-08-10 18:03:22,837 - INFO - Epoch: 27/500, Iter: 12/119 -- train_loss: 1.1776 
2025-08-10 18:03:23,139 - INFO - Epoch: 27/500, Iter: 13/119 -- train_loss: 1.1786 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:06:55,570 - INFO - Epoch: 28/500, Iter: 1/119 -- train_loss: 1.0598 


[1/119]   1%|           [00:00<?]

2025-08-10 18:06:56,216 - INFO - Epoch: 28/500, Iter: 2/119 -- train_loss: 1.1612 
2025-08-10 18:06:58,939 - INFO - Epoch: 28/500, Iter: 3/119 -- train_loss: 1.1487 
2025-08-10 18:06:59,251 - INFO - Epoch: 28/500, Iter: 4/119 -- train_loss: 1.1799 
2025-08-10 18:06:59,545 - INFO - Epoch: 28/500, Iter: 5/119 -- train_loss: 1.1376 
2025-08-10 18:07:00,871 - INFO - Epoch: 28/500, Iter: 6/119 -- train_loss: 1.1724 
2025-08-10 18:07:01,133 - INFO - Epoch: 28/500, Iter: 7/119 -- train_loss: 1.1621 
2025-08-10 18:07:01,434 - INFO - Epoch: 28/500, Iter: 8/119 -- train_loss: 1.1330 
2025-08-10 18:07:01,785 - INFO - Epoch: 28/500, Iter: 9/119 -- train_loss: 1.1226 
2025-08-10 18:07:02,451 - INFO - Epoch: 28/500, Iter: 10/119 -- train_loss: 1.0267 
2025-08-10 18:07:04,381 - INFO - Epoch: 28/500, Iter: 11/119 -- train_loss: 1.1122 
2025-08-10 18:07:04,721 - INFO - Epoch: 28/500, Iter: 12/119 -- train_loss: 1.1834 
2025-08-10 18:07:05,022 - INFO - Epoch: 28/500, Iter: 13/119 -- train_loss: 1.1065 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:10:45,893 - INFO - Epoch: 29/500, Iter: 1/119 -- train_loss: 1.1555 


[1/119]   1%|           [00:00<?]

2025-08-10 18:10:46,211 - INFO - Epoch: 29/500, Iter: 2/119 -- train_loss: 1.1780 
2025-08-10 18:10:46,492 - INFO - Epoch: 29/500, Iter: 3/119 -- train_loss: 1.1404 
2025-08-10 18:10:46,775 - INFO - Epoch: 29/500, Iter: 4/119 -- train_loss: 1.1564 
2025-08-10 18:10:47,080 - INFO - Epoch: 29/500, Iter: 5/119 -- train_loss: 1.1513 
2025-08-10 18:10:47,369 - INFO - Epoch: 29/500, Iter: 6/119 -- train_loss: 1.1579 
2025-08-10 18:10:47,684 - INFO - Epoch: 29/500, Iter: 7/119 -- train_loss: 1.1865 
2025-08-10 18:10:47,959 - INFO - Epoch: 29/500, Iter: 8/119 -- train_loss: 1.1632 
2025-08-10 18:10:54,429 - INFO - Epoch: 29/500, Iter: 9/119 -- train_loss: 1.1701 
2025-08-10 18:10:54,714 - INFO - Epoch: 29/500, Iter: 10/119 -- train_loss: 1.1680 
2025-08-10 18:10:54,992 - INFO - Epoch: 29/500, Iter: 11/119 -- train_loss: 1.1768 
2025-08-10 18:10:55,280 - INFO - Epoch: 29/500, Iter: 12/119 -- train_loss: 1.1769 
2025-08-10 18:10:55,542 - INFO - Epoch: 29/500, Iter: 13/119 -- train_loss: 1.1784 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:14:41,767 - INFO - Epoch: 30/500, Iter: 1/119 -- train_loss: 1.1564 


[1/119]   1%|           [00:00<?]

2025-08-10 18:14:42,069 - INFO - Epoch: 30/500, Iter: 2/119 -- train_loss: 1.1767 
2025-08-10 18:14:42,366 - INFO - Epoch: 30/500, Iter: 3/119 -- train_loss: 1.1699 
2025-08-10 18:14:42,655 - INFO - Epoch: 30/500, Iter: 4/119 -- train_loss: 1.1675 
2025-08-10 18:14:42,934 - INFO - Epoch: 30/500, Iter: 5/119 -- train_loss: 1.1477 
2025-08-10 18:14:43,251 - INFO - Epoch: 30/500, Iter: 6/119 -- train_loss: 1.1799 
2025-08-10 18:14:43,542 - INFO - Epoch: 30/500, Iter: 7/119 -- train_loss: 1.1789 
2025-08-10 18:14:43,854 - INFO - Epoch: 30/500, Iter: 8/119 -- train_loss: 1.1321 
2025-08-10 18:14:46,647 - INFO - Epoch: 30/500, Iter: 9/119 -- train_loss: 1.1283 
2025-08-10 18:14:46,964 - INFO - Epoch: 30/500, Iter: 10/119 -- train_loss: 1.1776 
2025-08-10 18:14:47,266 - INFO - Epoch: 30/500, Iter: 11/119 -- train_loss: 1.1402 
2025-08-10 18:14:47,549 - INFO - Epoch: 30/500, Iter: 12/119 -- train_loss: 1.1766 
2025-08-10 18:14:47,830 - INFO - Epoch: 30/500, Iter: 13/119 -- train_loss: 1.1655 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:18:17,341 - INFO - Epoch: 31/500, Iter: 1/119 -- train_loss: 1.1616 


[1/119]   1%|           [00:00<?]

2025-08-10 18:18:17,682 - INFO - Epoch: 31/500, Iter: 2/119 -- train_loss: 1.1458 
2025-08-10 18:18:17,985 - INFO - Epoch: 31/500, Iter: 3/119 -- train_loss: 1.1781 
2025-08-10 18:18:18,299 - INFO - Epoch: 31/500, Iter: 4/119 -- train_loss: 1.0921 
2025-08-10 18:18:18,585 - INFO - Epoch: 31/500, Iter: 5/119 -- train_loss: 1.1600 
2025-08-10 18:18:19,889 - INFO - Epoch: 31/500, Iter: 6/119 -- train_loss: 1.0820 
2025-08-10 18:18:20,215 - INFO - Epoch: 31/500, Iter: 7/119 -- train_loss: 1.1692 
2025-08-10 18:18:20,508 - INFO - Epoch: 31/500, Iter: 8/119 -- train_loss: 1.1651 
2025-08-10 18:18:21,746 - INFO - Epoch: 31/500, Iter: 9/119 -- train_loss: 1.1772 
2025-08-10 18:18:23,948 - INFO - Epoch: 31/500, Iter: 10/119 -- train_loss: 1.1813 
2025-08-10 18:18:24,268 - INFO - Epoch: 31/500, Iter: 11/119 -- train_loss: 1.1793 
2025-08-10 18:18:24,571 - INFO - Epoch: 31/500, Iter: 12/119 -- train_loss: 1.1285 
2025-08-10 18:18:24,831 - INFO - Epoch: 31/500, Iter: 13/119 -- train_loss: 1.1705 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:22:02,025 - INFO - Epoch: 32/500, Iter: 1/119 -- train_loss: 1.1777 


[1/119]   1%|           [00:00<?]

2025-08-10 18:22:03,659 - INFO - Epoch: 32/500, Iter: 2/119 -- train_loss: 1.1680 
2025-08-10 18:22:03,958 - INFO - Epoch: 32/500, Iter: 3/119 -- train_loss: 1.1507 
2025-08-10 18:22:04,268 - INFO - Epoch: 32/500, Iter: 4/119 -- train_loss: 1.1770 
2025-08-10 18:22:04,599 - INFO - Epoch: 32/500, Iter: 5/119 -- train_loss: 1.1767 
2025-08-10 18:22:04,926 - INFO - Epoch: 32/500, Iter: 6/119 -- train_loss: 1.1542 
2025-08-10 18:22:05,252 - INFO - Epoch: 32/500, Iter: 7/119 -- train_loss: 1.1719 
2025-08-10 18:22:05,769 - INFO - Epoch: 32/500, Iter: 8/119 -- train_loss: 1.1767 
2025-08-10 18:22:11,214 - INFO - Epoch: 32/500, Iter: 9/119 -- train_loss: 1.1686 
2025-08-10 18:22:11,518 - INFO - Epoch: 32/500, Iter: 10/119 -- train_loss: 1.1769 
2025-08-10 18:22:11,823 - INFO - Epoch: 32/500, Iter: 11/119 -- train_loss: 1.1341 
2025-08-10 18:22:12,120 - INFO - Epoch: 32/500, Iter: 12/119 -- train_loss: 1.1729 
2025-08-10 18:22:12,446 - INFO - Epoch: 32/500, Iter: 13/119 -- train_loss: 1.1762 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:26:08,606 - INFO - Epoch: 33/500, Iter: 1/119 -- train_loss: 1.1769 


[1/119]   1%|           [00:00<?]

2025-08-10 18:26:12,083 - INFO - Epoch: 33/500, Iter: 2/119 -- train_loss: 1.1465 
2025-08-10 18:26:12,347 - INFO - Epoch: 33/500, Iter: 3/119 -- train_loss: 1.1802 
2025-08-10 18:26:12,643 - INFO - Epoch: 33/500, Iter: 4/119 -- train_loss: 1.1518 
2025-08-10 18:26:12,959 - INFO - Epoch: 33/500, Iter: 5/119 -- train_loss: 1.1690 
2025-08-10 18:26:13,238 - INFO - Epoch: 33/500, Iter: 6/119 -- train_loss: 1.1790 
2025-08-10 18:26:13,558 - INFO - Epoch: 33/500, Iter: 7/119 -- train_loss: 1.1689 
2025-08-10 18:26:13,863 - INFO - Epoch: 33/500, Iter: 8/119 -- train_loss: 1.1302 
2025-08-10 18:26:17,261 - INFO - Epoch: 33/500, Iter: 9/119 -- train_loss: 1.1776 
2025-08-10 18:26:20,339 - INFO - Epoch: 33/500, Iter: 10/119 -- train_loss: 1.1485 
2025-08-10 18:26:20,659 - INFO - Epoch: 33/500, Iter: 11/119 -- train_loss: 1.1524 
2025-08-10 18:26:20,969 - INFO - Epoch: 33/500, Iter: 12/119 -- train_loss: 1.1851 
2025-08-10 18:26:21,263 - INFO - Epoch: 33/500, Iter: 13/119 -- train_loss: 1.1769 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:29:54,097 - INFO - Epoch: 34/500, Iter: 1/119 -- train_loss: 1.1790 


[1/119]   1%|           [00:00<?]

2025-08-10 18:29:54,404 - INFO - Epoch: 34/500, Iter: 2/119 -- train_loss: 1.1827 
2025-08-10 18:29:54,739 - INFO - Epoch: 34/500, Iter: 3/119 -- train_loss: 1.1825 
2025-08-10 18:29:55,033 - INFO - Epoch: 34/500, Iter: 4/119 -- train_loss: 1.1767 
2025-08-10 18:29:57,913 - INFO - Epoch: 34/500, Iter: 5/119 -- train_loss: 1.0971 
2025-08-10 18:29:58,209 - INFO - Epoch: 34/500, Iter: 6/119 -- train_loss: 1.0788 
2025-08-10 18:29:58,490 - INFO - Epoch: 34/500, Iter: 7/119 -- train_loss: 1.1809 
2025-08-10 18:29:58,840 - INFO - Epoch: 34/500, Iter: 8/119 -- train_loss: 1.0632 
2025-08-10 18:30:00,042 - INFO - Epoch: 34/500, Iter: 9/119 -- train_loss: 1.1205 
2025-08-10 18:30:06,680 - INFO - Epoch: 34/500, Iter: 10/119 -- train_loss: 1.1770 
2025-08-10 18:30:06,966 - INFO - Epoch: 34/500, Iter: 11/119 -- train_loss: 1.1773 
2025-08-10 18:30:07,313 - INFO - Epoch: 34/500, Iter: 12/119 -- train_loss: 1.1820 
2025-08-10 18:30:07,623 - INFO - Epoch: 34/500, Iter: 13/119 -- train_loss: 1.1787 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:33:48,861 - INFO - Epoch: 35/500, Iter: 1/119 -- train_loss: 1.1255 


[1/119]   1%|           [00:00<?]

2025-08-10 18:33:58,604 - INFO - Epoch: 35/500, Iter: 2/119 -- train_loss: 1.1734 
2025-08-10 18:33:58,859 - INFO - Epoch: 35/500, Iter: 3/119 -- train_loss: 1.1781 
2025-08-10 18:33:59,119 - INFO - Epoch: 35/500, Iter: 4/119 -- train_loss: 1.1764 
2025-08-10 18:33:59,399 - INFO - Epoch: 35/500, Iter: 5/119 -- train_loss: 1.1677 
2025-08-10 18:33:59,692 - INFO - Epoch: 35/500, Iter: 6/119 -- train_loss: 1.1668 
2025-08-10 18:34:00,004 - INFO - Epoch: 35/500, Iter: 7/119 -- train_loss: 1.1421 
2025-08-10 18:34:00,290 - INFO - Epoch: 35/500, Iter: 8/119 -- train_loss: 1.1749 
2025-08-10 18:34:00,591 - INFO - Epoch: 35/500, Iter: 9/119 -- train_loss: 1.1768 
2025-08-10 18:34:06,801 - INFO - Epoch: 35/500, Iter: 10/119 -- train_loss: 1.1711 
2025-08-10 18:34:07,067 - INFO - Epoch: 35/500, Iter: 11/119 -- train_loss: 1.1773 
2025-08-10 18:34:07,311 - INFO - Epoch: 35/500, Iter: 12/119 -- train_loss: 1.1126 
2025-08-10 18:34:07,583 - INFO - Epoch: 35/500, Iter: 13/119 -- train_loss: 1.1124 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:37:45,541 - INFO - Epoch: 36/500, Iter: 1/119 -- train_loss: 1.1260 


[1/119]   1%|           [00:00<?]

2025-08-10 18:37:51,207 - INFO - Epoch: 36/500, Iter: 2/119 -- train_loss: 1.1482 
2025-08-10 18:37:51,488 - INFO - Epoch: 36/500, Iter: 3/119 -- train_loss: 1.1489 
2025-08-10 18:37:51,752 - INFO - Epoch: 36/500, Iter: 4/119 -- train_loss: 1.1701 
2025-08-10 18:37:52,103 - INFO - Epoch: 36/500, Iter: 5/119 -- train_loss: 1.1712 
2025-08-10 18:37:52,401 - INFO - Epoch: 36/500, Iter: 6/119 -- train_loss: 1.1777 
2025-08-10 18:37:52,739 - INFO - Epoch: 36/500, Iter: 7/119 -- train_loss: 1.1811 
2025-08-10 18:37:53,024 - INFO - Epoch: 36/500, Iter: 8/119 -- train_loss: 1.1504 
2025-08-10 18:37:53,351 - INFO - Epoch: 36/500, Iter: 9/119 -- train_loss: 1.1469 
2025-08-10 18:37:57,743 - INFO - Epoch: 36/500, Iter: 10/119 -- train_loss: 1.1614 
2025-08-10 18:37:58,037 - INFO - Epoch: 36/500, Iter: 11/119 -- train_loss: 1.1791 
2025-08-10 18:37:58,343 - INFO - Epoch: 36/500, Iter: 12/119 -- train_loss: 1.1747 
2025-08-10 18:37:58,649 - INFO - Epoch: 36/500, Iter: 13/119 -- train_loss: 1.1529 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:41:31,151 - INFO - Epoch: 37/500, Iter: 1/119 -- train_loss: 1.1767 


[1/119]   1%|           [00:00<?]

2025-08-10 18:41:32,088 - INFO - Epoch: 37/500, Iter: 2/119 -- train_loss: 1.1795 
2025-08-10 18:41:32,796 - INFO - Epoch: 37/500, Iter: 3/119 -- train_loss: 1.1696 
2025-08-10 18:41:35,147 - INFO - Epoch: 37/500, Iter: 4/119 -- train_loss: 1.1801 
2025-08-10 18:41:37,169 - INFO - Epoch: 37/500, Iter: 5/119 -- train_loss: 1.1780 
2025-08-10 18:41:37,456 - INFO - Epoch: 37/500, Iter: 6/119 -- train_loss: 1.1782 
2025-08-10 18:41:37,752 - INFO - Epoch: 37/500, Iter: 7/119 -- train_loss: 1.1752 
2025-08-10 18:41:38,025 - INFO - Epoch: 37/500, Iter: 8/119 -- train_loss: 1.1093 
2025-08-10 18:41:44,724 - INFO - Epoch: 37/500, Iter: 9/119 -- train_loss: 1.1720 
2025-08-10 18:41:45,029 - INFO - Epoch: 37/500, Iter: 10/119 -- train_loss: 1.1369 
2025-08-10 18:41:45,319 - INFO - Epoch: 37/500, Iter: 11/119 -- train_loss: 1.1639 
2025-08-10 18:41:45,617 - INFO - Epoch: 37/500, Iter: 12/119 -- train_loss: 1.1807 
2025-08-10 18:41:45,972 - INFO - Epoch: 37/500, Iter: 13/119 -- train_loss: 1.1500 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:45:22,057 - INFO - Epoch: 38/500, Iter: 1/119 -- train_loss: 1.1269 


[1/119]   1%|           [00:00<?]

2025-08-10 18:45:23,641 - INFO - Epoch: 38/500, Iter: 2/119 -- train_loss: 1.1784 
2025-08-10 18:45:23,918 - INFO - Epoch: 38/500, Iter: 3/119 -- train_loss: 1.1760 
2025-08-10 18:45:26,703 - INFO - Epoch: 38/500, Iter: 4/119 -- train_loss: 1.1065 
2025-08-10 18:45:29,017 - INFO - Epoch: 38/500, Iter: 5/119 -- train_loss: 1.1572 
2025-08-10 18:45:29,317 - INFO - Epoch: 38/500, Iter: 6/119 -- train_loss: 1.1404 
2025-08-10 18:45:29,674 - INFO - Epoch: 38/500, Iter: 7/119 -- train_loss: 1.1342 
2025-08-10 18:45:29,972 - INFO - Epoch: 38/500, Iter: 8/119 -- train_loss: 1.1144 
2025-08-10 18:45:30,329 - INFO - Epoch: 38/500, Iter: 9/119 -- train_loss: 1.1695 
2025-08-10 18:45:30,608 - INFO - Epoch: 38/500, Iter: 10/119 -- train_loss: 1.1383 
2025-08-10 18:45:30,924 - INFO - Epoch: 38/500, Iter: 11/119 -- train_loss: 1.1801 
2025-08-10 18:45:33,000 - INFO - Epoch: 38/500, Iter: 12/119 -- train_loss: 1.1791 
2025-08-10 18:45:34,899 - INFO - Epoch: 38/500, Iter: 13/119 -- train_loss: 1.1790 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:49:05,364 - INFO - Epoch: 39/500, Iter: 1/119 -- train_loss: 1.1783 


[1/119]   1%|           [00:00<?]

2025-08-10 18:49:09,368 - INFO - Epoch: 39/500, Iter: 2/119 -- train_loss: 1.1805 
2025-08-10 18:49:09,921 - INFO - Epoch: 39/500, Iter: 3/119 -- train_loss: 1.1792 
2025-08-10 18:49:10,188 - INFO - Epoch: 39/500, Iter: 4/119 -- train_loss: 1.1797 
2025-08-10 18:49:10,493 - INFO - Epoch: 39/500, Iter: 5/119 -- train_loss: 1.1744 
2025-08-10 18:49:10,801 - INFO - Epoch: 39/500, Iter: 6/119 -- train_loss: 1.1813 
2025-08-10 18:49:11,084 - INFO - Epoch: 39/500, Iter: 7/119 -- train_loss: 1.1121 
2025-08-10 18:49:11,399 - INFO - Epoch: 39/500, Iter: 8/119 -- train_loss: 1.1811 
2025-08-10 18:49:11,755 - INFO - Epoch: 39/500, Iter: 9/119 -- train_loss: 1.1627 
2025-08-10 18:49:15,291 - INFO - Epoch: 39/500, Iter: 10/119 -- train_loss: 1.1650 
2025-08-10 18:49:15,581 - INFO - Epoch: 39/500, Iter: 11/119 -- train_loss: 1.0217 
2025-08-10 18:49:15,908 - INFO - Epoch: 39/500, Iter: 12/119 -- train_loss: 1.1705 
2025-08-10 18:49:16,232 - INFO - Epoch: 39/500, Iter: 13/119 -- train_loss: 1.1773 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:52:58,508 - INFO - Epoch: 40/500, Iter: 1/119 -- train_loss: 1.1784 


[1/119]   1%|           [00:00<?]

2025-08-10 18:52:58,834 - INFO - Epoch: 40/500, Iter: 2/119 -- train_loss: 1.1771 
2025-08-10 18:52:59,148 - INFO - Epoch: 40/500, Iter: 3/119 -- train_loss: 1.1685 
2025-08-10 18:52:59,475 - INFO - Epoch: 40/500, Iter: 4/119 -- train_loss: 1.1794 
2025-08-10 18:53:01,074 - INFO - Epoch: 40/500, Iter: 5/119 -- train_loss: 1.1346 
2025-08-10 18:53:01,367 - INFO - Epoch: 40/500, Iter: 6/119 -- train_loss: 1.1769 
2025-08-10 18:53:01,703 - INFO - Epoch: 40/500, Iter: 7/119 -- train_loss: 1.1779 
2025-08-10 18:53:01,979 - INFO - Epoch: 40/500, Iter: 8/119 -- train_loss: 1.1770 
2025-08-10 18:53:04,758 - INFO - Epoch: 40/500, Iter: 9/119 -- train_loss: 1.1450 
2025-08-10 18:53:05,058 - INFO - Epoch: 40/500, Iter: 10/119 -- train_loss: 1.1585 
2025-08-10 18:53:05,381 - INFO - Epoch: 40/500, Iter: 11/119 -- train_loss: 1.1700 
2025-08-10 18:53:05,702 - INFO - Epoch: 40/500, Iter: 12/119 -- train_loss: 1.1501 
2025-08-10 18:53:06,606 - INFO - Epoch: 40/500, Iter: 13/119 -- train_loss: 1.1762 


[1/20]   5%|5          [00:00<?]

2025-08-10 18:56:45,059 - INFO - Epoch: 41/500, Iter: 1/119 -- train_loss: 1.1834 


[1/119]   1%|           [00:00<?]

2025-08-10 18:56:45,343 - INFO - Epoch: 41/500, Iter: 2/119 -- train_loss: 1.1309 
2025-08-10 18:56:45,970 - INFO - Epoch: 41/500, Iter: 3/119 -- train_loss: 1.1807 
2025-08-10 18:56:46,264 - INFO - Epoch: 41/500, Iter: 4/119 -- train_loss: 1.1474 
2025-08-10 18:56:48,970 - INFO - Epoch: 41/500, Iter: 5/119 -- train_loss: 1.1645 
2025-08-10 18:56:49,323 - INFO - Epoch: 41/500, Iter: 6/119 -- train_loss: 1.1238 
2025-08-10 18:56:49,629 - INFO - Epoch: 41/500, Iter: 7/119 -- train_loss: 1.0784 
2025-08-10 18:56:49,956 - INFO - Epoch: 41/500, Iter: 8/119 -- train_loss: 1.0946 
2025-08-10 18:56:50,290 - INFO - Epoch: 41/500, Iter: 9/119 -- train_loss: 1.1764 
2025-08-10 18:56:50,566 - INFO - Epoch: 41/500, Iter: 10/119 -- train_loss: 1.1799 
2025-08-10 18:56:52,029 - INFO - Epoch: 41/500, Iter: 11/119 -- train_loss: 1.1839 
2025-08-10 18:56:53,722 - INFO - Epoch: 41/500, Iter: 12/119 -- train_loss: 1.1726 
2025-08-10 18:56:58,864 - INFO - Epoch: 41/500, Iter: 13/119 -- train_loss: 1.1760 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:00:33,040 - INFO - Epoch: 42/500, Iter: 1/119 -- train_loss: 1.1787 


[1/119]   1%|           [00:00<?]

2025-08-10 19:00:35,790 - INFO - Epoch: 42/500, Iter: 2/119 -- train_loss: 1.1795 
2025-08-10 19:00:36,519 - INFO - Epoch: 42/500, Iter: 3/119 -- train_loss: 1.1777 
2025-08-10 19:00:39,107 - INFO - Epoch: 42/500, Iter: 4/119 -- train_loss: 1.1805 
2025-08-10 19:00:39,394 - INFO - Epoch: 42/500, Iter: 5/119 -- train_loss: 1.1805 
2025-08-10 19:00:39,658 - INFO - Epoch: 42/500, Iter: 6/119 -- train_loss: 1.1668 
2025-08-10 19:00:44,345 - INFO - Epoch: 42/500, Iter: 7/119 -- train_loss: 1.1730 
2025-08-10 19:00:44,616 - INFO - Epoch: 42/500, Iter: 8/119 -- train_loss: 1.1657 
2025-08-10 19:00:44,907 - INFO - Epoch: 42/500, Iter: 9/119 -- train_loss: 1.1464 
2025-08-10 19:00:45,194 - INFO - Epoch: 42/500, Iter: 10/119 -- train_loss: 1.1661 
2025-08-10 19:00:45,543 - INFO - Epoch: 42/500, Iter: 11/119 -- train_loss: 1.1681 
2025-08-10 19:00:45,847 - INFO - Epoch: 42/500, Iter: 12/119 -- train_loss: 1.1597 
2025-08-10 19:00:46,193 - INFO - Epoch: 42/500, Iter: 13/119 -- train_loss: 1.1736 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:04:19,151 - INFO - Epoch: 43/500, Iter: 1/119 -- train_loss: 1.1754 


[1/119]   1%|           [00:00<?]

2025-08-10 19:04:19,867 - INFO - Epoch: 43/500, Iter: 2/119 -- train_loss: 1.1777 
2025-08-10 19:04:21,529 - INFO - Epoch: 43/500, Iter: 3/119 -- train_loss: 1.1769 
2025-08-10 19:04:21,814 - INFO - Epoch: 43/500, Iter: 4/119 -- train_loss: 1.1757 
2025-08-10 19:04:24,225 - INFO - Epoch: 43/500, Iter: 5/119 -- train_loss: 1.1764 
2025-08-10 19:04:24,534 - INFO - Epoch: 43/500, Iter: 6/119 -- train_loss: 1.1766 
2025-08-10 19:04:24,821 - INFO - Epoch: 43/500, Iter: 7/119 -- train_loss: 1.1759 
2025-08-10 19:04:25,107 - INFO - Epoch: 43/500, Iter: 8/119 -- train_loss: 1.1783 
2025-08-10 19:04:25,402 - INFO - Epoch: 43/500, Iter: 9/119 -- train_loss: 1.1761 
2025-08-10 19:04:26,196 - INFO - Epoch: 43/500, Iter: 10/119 -- train_loss: 1.1583 
2025-08-10 19:04:26,885 - INFO - Epoch: 43/500, Iter: 11/119 -- train_loss: 1.1616 
2025-08-10 19:04:27,424 - INFO - Epoch: 43/500, Iter: 12/119 -- train_loss: 1.1525 
2025-08-10 19:04:33,310 - INFO - Epoch: 43/500, Iter: 13/119 -- train_loss: 1.1427 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:08:13,654 - INFO - Epoch: 44/500, Iter: 1/119 -- train_loss: 1.1381 


[1/119]   1%|           [00:00<?]

2025-08-10 19:08:17,221 - INFO - Epoch: 44/500, Iter: 2/119 -- train_loss: 1.1475 
2025-08-10 19:08:17,528 - INFO - Epoch: 44/500, Iter: 3/119 -- train_loss: 1.1701 
2025-08-10 19:08:17,815 - INFO - Epoch: 44/500, Iter: 4/119 -- train_loss: 1.1775 
2025-08-10 19:08:18,154 - INFO - Epoch: 44/500, Iter: 5/119 -- train_loss: 1.1491 
2025-08-10 19:08:18,429 - INFO - Epoch: 44/500, Iter: 6/119 -- train_loss: 1.1677 
2025-08-10 19:08:23,190 - INFO - Epoch: 44/500, Iter: 7/119 -- train_loss: 1.1744 
2025-08-10 19:08:23,495 - INFO - Epoch: 44/500, Iter: 8/119 -- train_loss: 1.1094 
2025-08-10 19:08:29,071 - INFO - Epoch: 44/500, Iter: 9/119 -- train_loss: 1.1769 
2025-08-10 19:08:29,357 - INFO - Epoch: 44/500, Iter: 10/119 -- train_loss: 1.1788 
2025-08-10 19:08:29,648 - INFO - Epoch: 44/500, Iter: 11/119 -- train_loss: 1.1765 
2025-08-10 19:08:29,937 - INFO - Epoch: 44/500, Iter: 12/119 -- train_loss: 1.1758 
2025-08-10 19:08:30,238 - INFO - Epoch: 44/500, Iter: 13/119 -- train_loss: 1.1763 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:12:09,379 - INFO - Epoch: 45/500, Iter: 1/119 -- train_loss: 1.1434 


[1/119]   1%|           [00:00<?]

2025-08-10 19:12:14,328 - INFO - Epoch: 45/500, Iter: 2/119 -- train_loss: 1.1760 
2025-08-10 19:12:14,630 - INFO - Epoch: 45/500, Iter: 3/119 -- train_loss: 1.1761 
2025-08-10 19:12:14,902 - INFO - Epoch: 45/500, Iter: 4/119 -- train_loss: 1.1510 
2025-08-10 19:12:16,596 - INFO - Epoch: 45/500, Iter: 5/119 -- train_loss: 1.1711 
2025-08-10 19:12:16,883 - INFO - Epoch: 45/500, Iter: 6/119 -- train_loss: 1.1650 
2025-08-10 19:12:17,209 - INFO - Epoch: 45/500, Iter: 7/119 -- train_loss: 1.1756 
2025-08-10 19:12:17,506 - INFO - Epoch: 45/500, Iter: 8/119 -- train_loss: 1.1781 
2025-08-10 19:12:17,835 - INFO - Epoch: 45/500, Iter: 9/119 -- train_loss: 1.1676 
2025-08-10 19:12:19,033 - INFO - Epoch: 45/500, Iter: 10/119 -- train_loss: 1.1552 
2025-08-10 19:12:19,342 - INFO - Epoch: 45/500, Iter: 11/119 -- train_loss: 1.1774 
2025-08-10 19:12:19,669 - INFO - Epoch: 45/500, Iter: 12/119 -- train_loss: 1.1504 
2025-08-10 19:12:27,177 - INFO - Epoch: 45/500, Iter: 13/119 -- train_loss: 1.1308 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:16:09,483 - INFO - Epoch: 46/500, Iter: 1/119 -- train_loss: 1.1464 


[1/119]   1%|           [00:00<?]

2025-08-10 19:16:09,769 - INFO - Epoch: 46/500, Iter: 2/119 -- train_loss: 1.1513 
2025-08-10 19:16:10,072 - INFO - Epoch: 46/500, Iter: 3/119 -- train_loss: 1.1780 
2025-08-10 19:16:12,286 - INFO - Epoch: 46/500, Iter: 4/119 -- train_loss: 1.1755 
2025-08-10 19:16:12,580 - INFO - Epoch: 46/500, Iter: 5/119 -- train_loss: 1.1699 
2025-08-10 19:16:12,874 - INFO - Epoch: 46/500, Iter: 6/119 -- train_loss: 1.1592 
2025-08-10 19:16:13,180 - INFO - Epoch: 46/500, Iter: 7/119 -- train_loss: 1.1791 
2025-08-10 19:16:13,493 - INFO - Epoch: 46/500, Iter: 8/119 -- train_loss: 1.1523 
2025-08-10 19:16:15,176 - INFO - Epoch: 46/500, Iter: 9/119 -- train_loss: 1.1793 
2025-08-10 19:16:15,479 - INFO - Epoch: 46/500, Iter: 10/119 -- train_loss: 1.1826 
2025-08-10 19:16:15,817 - INFO - Epoch: 46/500, Iter: 11/119 -- train_loss: 1.1750 
2025-08-10 19:16:18,210 - INFO - Epoch: 46/500, Iter: 12/119 -- train_loss: 1.0797 
2025-08-10 19:16:18,513 - INFO - Epoch: 46/500, Iter: 13/119 -- train_loss: 1.1696 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:19:51,086 - INFO - Epoch: 47/500, Iter: 1/119 -- train_loss: 1.1629 


[1/119]   1%|           [00:00<?]

2025-08-10 19:19:55,575 - INFO - Epoch: 47/500, Iter: 2/119 -- train_loss: 1.1705 
2025-08-10 19:19:55,840 - INFO - Epoch: 47/500, Iter: 3/119 -- train_loss: 1.1758 
2025-08-10 19:19:56,131 - INFO - Epoch: 47/500, Iter: 4/119 -- train_loss: 1.1639 
2025-08-10 19:19:56,453 - INFO - Epoch: 47/500, Iter: 5/119 -- train_loss: 1.1755 
2025-08-10 19:19:56,747 - INFO - Epoch: 47/500, Iter: 6/119 -- train_loss: 1.1527 
2025-08-10 19:19:57,047 - INFO - Epoch: 47/500, Iter: 7/119 -- train_loss: 1.1773 
2025-08-10 19:19:57,342 - INFO - Epoch: 47/500, Iter: 8/119 -- train_loss: 1.1545 
2025-08-10 19:19:57,671 - INFO - Epoch: 47/500, Iter: 9/119 -- train_loss: 1.1769 
2025-08-10 19:20:07,408 - INFO - Epoch: 47/500, Iter: 10/119 -- train_loss: 1.1704 
2025-08-10 19:20:07,680 - INFO - Epoch: 47/500, Iter: 11/119 -- train_loss: 1.1483 
2025-08-10 19:20:07,980 - INFO - Epoch: 47/500, Iter: 12/119 -- train_loss: 1.1652 
2025-08-10 19:20:08,280 - INFO - Epoch: 47/500, Iter: 13/119 -- train_loss: 1.1753 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:23:52,968 - INFO - Epoch: 48/500, Iter: 1/119 -- train_loss: 1.1784 


[1/119]   1%|           [00:00<?]

2025-08-10 19:23:53,504 - INFO - Epoch: 48/500, Iter: 2/119 -- train_loss: 1.1502 
2025-08-10 19:23:53,775 - INFO - Epoch: 48/500, Iter: 3/119 -- train_loss: 1.1776 
2025-08-10 19:23:54,065 - INFO - Epoch: 48/500, Iter: 4/119 -- train_loss: 1.1755 
2025-08-10 19:23:54,318 - INFO - Epoch: 48/500, Iter: 5/119 -- train_loss: 1.1496 
2025-08-10 19:23:54,624 - INFO - Epoch: 48/500, Iter: 6/119 -- train_loss: 1.1760 
2025-08-10 19:23:54,952 - INFO - Epoch: 48/500, Iter: 7/119 -- train_loss: 1.1793 
2025-08-10 19:23:55,260 - INFO - Epoch: 48/500, Iter: 8/119 -- train_loss: 1.1418 
2025-08-10 19:23:57,282 - INFO - Epoch: 48/500, Iter: 9/119 -- train_loss: 1.1358 
2025-08-10 19:23:59,655 - INFO - Epoch: 48/500, Iter: 10/119 -- train_loss: 1.1629 
2025-08-10 19:23:59,955 - INFO - Epoch: 48/500, Iter: 11/119 -- train_loss: 1.1482 
2025-08-10 19:24:00,237 - INFO - Epoch: 48/500, Iter: 12/119 -- train_loss: 1.1769 
2025-08-10 19:24:00,553 - INFO - Epoch: 48/500, Iter: 13/119 -- train_loss: 1.1751 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:27:30,281 - INFO - Epoch: 49/500, Iter: 1/119 -- train_loss: 1.1634 


[1/119]   1%|           [00:00<?]

2025-08-10 19:27:35,041 - INFO - Epoch: 49/500, Iter: 2/119 -- train_loss: 1.1758 
2025-08-10 19:27:35,355 - INFO - Epoch: 49/500, Iter: 3/119 -- train_loss: 1.1751 
2025-08-10 19:27:35,632 - INFO - Epoch: 49/500, Iter: 4/119 -- train_loss: 1.1011 
2025-08-10 19:27:35,918 - INFO - Epoch: 49/500, Iter: 5/119 -- train_loss: 1.1701 
2025-08-10 19:27:36,228 - INFO - Epoch: 49/500, Iter: 6/119 -- train_loss: 1.1451 
2025-08-10 19:27:36,514 - INFO - Epoch: 49/500, Iter: 7/119 -- train_loss: 1.1580 
2025-08-10 19:27:36,809 - INFO - Epoch: 49/500, Iter: 8/119 -- train_loss: 1.1633 
2025-08-10 19:27:38,758 - INFO - Epoch: 49/500, Iter: 9/119 -- train_loss: 1.1753 
2025-08-10 19:27:39,289 - INFO - Epoch: 49/500, Iter: 10/119 -- train_loss: 1.1713 
2025-08-10 19:27:39,569 - INFO - Epoch: 49/500, Iter: 11/119 -- train_loss: 1.1653 
2025-08-10 19:27:39,889 - INFO - Epoch: 49/500, Iter: 12/119 -- train_loss: 1.1660 
2025-08-10 19:27:40,138 - INFO - Epoch: 49/500, Iter: 13/119 -- train_loss: 1.1721 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:31:09,033 - INFO - Epoch: 50/500, Iter: 1/119 -- train_loss: 1.1757 


[1/119]   1%|           [00:00<?]

2025-08-10 19:31:09,348 - INFO - Epoch: 50/500, Iter: 2/119 -- train_loss: 1.1750 
2025-08-10 19:31:15,835 - INFO - Epoch: 50/500, Iter: 3/119 -- train_loss: 1.1753 
2025-08-10 19:31:16,156 - INFO - Epoch: 50/500, Iter: 4/119 -- train_loss: 1.1759 
2025-08-10 19:31:16,430 - INFO - Epoch: 50/500, Iter: 5/119 -- train_loss: 1.1732 
2025-08-10 19:31:16,725 - INFO - Epoch: 50/500, Iter: 6/119 -- train_loss: 1.1594 
2025-08-10 19:31:17,056 - INFO - Epoch: 50/500, Iter: 7/119 -- train_loss: 1.0556 
2025-08-10 19:31:17,378 - INFO - Epoch: 50/500, Iter: 8/119 -- train_loss: 1.1451 
2025-08-10 19:31:17,670 - INFO - Epoch: 50/500, Iter: 9/119 -- train_loss: 1.1336 
2025-08-10 19:31:18,003 - INFO - Epoch: 50/500, Iter: 10/119 -- train_loss: 1.1706 
2025-08-10 19:31:23,016 - INFO - Epoch: 50/500, Iter: 11/119 -- train_loss: 1.1698 
2025-08-10 19:31:23,345 - INFO - Epoch: 50/500, Iter: 12/119 -- train_loss: 1.1761 
2025-08-10 19:31:23,666 - INFO - Epoch: 50/500, Iter: 13/119 -- train_loss: 1.1768 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:34:51,536 - INFO - Epoch: 51/500, Iter: 1/119 -- train_loss: 1.1767 


[1/119]   1%|           [00:00<?]

2025-08-10 19:34:57,376 - INFO - Epoch: 51/500, Iter: 2/119 -- train_loss: 1.1358 
2025-08-10 19:35:00,877 - INFO - Epoch: 51/500, Iter: 3/119 -- train_loss: 1.1631 
2025-08-10 19:35:01,158 - INFO - Epoch: 51/500, Iter: 4/119 -- train_loss: 1.1785 
2025-08-10 19:35:01,434 - INFO - Epoch: 51/500, Iter: 5/119 -- train_loss: 1.1790 
2025-08-10 19:35:01,715 - INFO - Epoch: 51/500, Iter: 6/119 -- train_loss: 1.1747 
2025-08-10 19:35:02,008 - INFO - Epoch: 51/500, Iter: 7/119 -- train_loss: 1.1739 
2025-08-10 19:35:02,324 - INFO - Epoch: 51/500, Iter: 8/119 -- train_loss: 1.1492 
2025-08-10 19:35:02,624 - INFO - Epoch: 51/500, Iter: 9/119 -- train_loss: 1.1660 
2025-08-10 19:35:02,940 - INFO - Epoch: 51/500, Iter: 10/119 -- train_loss: 1.1752 
2025-08-10 19:35:06,291 - INFO - Epoch: 51/500, Iter: 11/119 -- train_loss: 1.1756 
2025-08-10 19:35:06,595 - INFO - Epoch: 51/500, Iter: 12/119 -- train_loss: 0.9270 
2025-08-10 19:35:06,912 - INFO - Epoch: 51/500, Iter: 13/119 -- train_loss: 0.9699 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:38:55,088 - INFO - Epoch: 52/500, Iter: 1/119 -- train_loss: 1.1716 


[1/119]   1%|           [00:00<?]

2025-08-10 19:38:55,385 - INFO - Epoch: 52/500, Iter: 2/119 -- train_loss: 1.1765 
2025-08-10 19:38:55,693 - INFO - Epoch: 52/500, Iter: 3/119 -- train_loss: 1.1409 
2025-08-10 19:38:55,998 - INFO - Epoch: 52/500, Iter: 4/119 -- train_loss: 1.1464 
2025-08-10 19:38:56,316 - INFO - Epoch: 52/500, Iter: 5/119 -- train_loss: 1.1807 
2025-08-10 19:38:56,631 - INFO - Epoch: 52/500, Iter: 6/119 -- train_loss: 1.1750 
2025-08-10 19:38:56,932 - INFO - Epoch: 52/500, Iter: 7/119 -- train_loss: 1.1550 
2025-08-10 19:38:57,263 - INFO - Epoch: 52/500, Iter: 8/119 -- train_loss: 1.1473 
2025-08-10 19:39:07,884 - INFO - Epoch: 52/500, Iter: 9/119 -- train_loss: 1.1659 
2025-08-10 19:39:08,129 - INFO - Epoch: 52/500, Iter: 10/119 -- train_loss: 1.1812 
2025-08-10 19:39:08,396 - INFO - Epoch: 52/500, Iter: 11/119 -- train_loss: 1.1781 
2025-08-10 19:39:08,697 - INFO - Epoch: 52/500, Iter: 12/119 -- train_loss: 1.1755 
2025-08-10 19:39:08,998 - INFO - Epoch: 52/500, Iter: 13/119 -- train_loss: 1.1777 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:42:48,618 - INFO - Epoch: 53/500, Iter: 1/119 -- train_loss: 1.1796 


[1/119]   1%|           [00:00<?]

2025-08-10 19:42:48,907 - INFO - Epoch: 53/500, Iter: 2/119 -- train_loss: 1.1562 
2025-08-10 19:42:49,224 - INFO - Epoch: 53/500, Iter: 3/119 -- train_loss: 1.1216 
2025-08-10 19:42:49,489 - INFO - Epoch: 53/500, Iter: 4/119 -- train_loss: 1.1416 
2025-08-10 19:42:49,801 - INFO - Epoch: 53/500, Iter: 5/119 -- train_loss: 1.1645 
2025-08-10 19:42:50,106 - INFO - Epoch: 53/500, Iter: 6/119 -- train_loss: 1.1753 
2025-08-10 19:42:50,416 - INFO - Epoch: 53/500, Iter: 7/119 -- train_loss: 1.1248 
2025-08-10 19:42:50,728 - INFO - Epoch: 53/500, Iter: 8/119 -- train_loss: 1.1613 
2025-08-10 19:42:58,229 - INFO - Epoch: 53/500, Iter: 9/119 -- train_loss: 1.1313 
2025-08-10 19:42:58,504 - INFO - Epoch: 53/500, Iter: 10/119 -- train_loss: 1.1759 
2025-08-10 19:42:58,801 - INFO - Epoch: 53/500, Iter: 11/119 -- train_loss: 1.1634 
2025-08-10 19:42:59,099 - INFO - Epoch: 53/500, Iter: 12/119 -- train_loss: 1.1818 
2025-08-10 19:42:59,400 - INFO - Epoch: 53/500, Iter: 13/119 -- train_loss: 1.1337 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:46:38,536 - INFO - Epoch: 54/500, Iter: 1/119 -- train_loss: 1.1794 


[1/119]   1%|           [00:00<?]

2025-08-10 19:46:38,863 - INFO - Epoch: 54/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-10 19:46:39,194 - INFO - Epoch: 54/500, Iter: 3/119 -- train_loss: 1.1810 
2025-08-10 19:46:39,515 - INFO - Epoch: 54/500, Iter: 4/119 -- train_loss: 1.1448 
2025-08-10 19:46:39,855 - INFO - Epoch: 54/500, Iter: 5/119 -- train_loss: 1.1807 
2025-08-10 19:46:40,165 - INFO - Epoch: 54/500, Iter: 6/119 -- train_loss: 1.1793 
2025-08-10 19:46:40,512 - INFO - Epoch: 54/500, Iter: 7/119 -- train_loss: 1.1690 
2025-08-10 19:46:40,832 - INFO - Epoch: 54/500, Iter: 8/119 -- train_loss: 1.1468 
2025-08-10 19:46:49,995 - INFO - Epoch: 54/500, Iter: 9/119 -- train_loss: 1.0512 
2025-08-10 19:46:50,315 - INFO - Epoch: 54/500, Iter: 10/119 -- train_loss: 1.1802 
2025-08-10 19:46:50,606 - INFO - Epoch: 54/500, Iter: 11/119 -- train_loss: 1.1763 
2025-08-10 19:46:50,922 - INFO - Epoch: 54/500, Iter: 12/119 -- train_loss: 1.1795 
2025-08-10 19:46:51,241 - INFO - Epoch: 54/500, Iter: 13/119 -- train_loss: 1.1669 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:50:31,670 - INFO - Epoch: 55/500, Iter: 1/119 -- train_loss: 1.1753 


[1/119]   1%|           [00:00<?]

2025-08-10 19:50:31,970 - INFO - Epoch: 55/500, Iter: 2/119 -- train_loss: 1.1493 
2025-08-10 19:50:32,264 - INFO - Epoch: 55/500, Iter: 3/119 -- train_loss: 1.1640 
2025-08-10 19:50:35,818 - INFO - Epoch: 55/500, Iter: 4/119 -- train_loss: 1.1681 
2025-08-10 19:50:36,101 - INFO - Epoch: 55/500, Iter: 5/119 -- train_loss: 1.1768 
2025-08-10 19:50:36,380 - INFO - Epoch: 55/500, Iter: 6/119 -- train_loss: 1.1180 
2025-08-10 19:50:36,673 - INFO - Epoch: 55/500, Iter: 7/119 -- train_loss: 1.1759 
2025-08-10 19:50:37,025 - INFO - Epoch: 55/500, Iter: 8/119 -- train_loss: 1.1141 
2025-08-10 19:50:42,888 - INFO - Epoch: 55/500, Iter: 9/119 -- train_loss: 1.1749 
2025-08-10 19:50:43,206 - INFO - Epoch: 55/500, Iter: 10/119 -- train_loss: 1.1740 
2025-08-10 19:50:43,488 - INFO - Epoch: 55/500, Iter: 11/119 -- train_loss: 1.1759 
2025-08-10 19:50:43,777 - INFO - Epoch: 55/500, Iter: 12/119 -- train_loss: 1.1254 
2025-08-10 19:50:44,057 - INFO - Epoch: 55/500, Iter: 13/119 -- train_loss: 1.1780 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:54:23,428 - INFO - Epoch: 56/500, Iter: 1/119 -- train_loss: 1.1769 


[1/119]   1%|           [00:00<?]

2025-08-10 19:54:26,732 - INFO - Epoch: 56/500, Iter: 2/119 -- train_loss: 1.1755 
2025-08-10 19:54:26,997 - INFO - Epoch: 56/500, Iter: 3/119 -- train_loss: 1.1413 
2025-08-10 19:54:27,310 - INFO - Epoch: 56/500, Iter: 4/119 -- train_loss: 1.1747 
2025-08-10 19:54:27,598 - INFO - Epoch: 56/500, Iter: 5/119 -- train_loss: 1.1647 
2025-08-10 19:54:27,878 - INFO - Epoch: 56/500, Iter: 6/119 -- train_loss: 1.1747 
2025-08-10 19:54:28,204 - INFO - Epoch: 56/500, Iter: 7/119 -- train_loss: 1.1750 
2025-08-10 19:54:28,505 - INFO - Epoch: 56/500, Iter: 8/119 -- train_loss: 1.1750 
2025-08-10 19:54:30,691 - INFO - Epoch: 56/500, Iter: 9/119 -- train_loss: 1.1777 
2025-08-10 19:54:33,096 - INFO - Epoch: 56/500, Iter: 10/119 -- train_loss: 1.1625 
2025-08-10 19:54:38,247 - INFO - Epoch: 56/500, Iter: 11/119 -- train_loss: 1.1751 
2025-08-10 19:54:38,531 - INFO - Epoch: 56/500, Iter: 12/119 -- train_loss: 1.1703 
2025-08-10 19:54:38,826 - INFO - Epoch: 56/500, Iter: 13/119 -- train_loss: 1.1742 


[1/20]   5%|5          [00:00<?]

2025-08-10 19:58:09,788 - INFO - Epoch: 57/500, Iter: 1/119 -- train_loss: 1.1811 


[1/119]   1%|           [00:00<?]

2025-08-10 19:58:12,546 - INFO - Epoch: 57/500, Iter: 2/119 -- train_loss: 1.1643 
2025-08-10 19:58:12,837 - INFO - Epoch: 57/500, Iter: 3/119 -- train_loss: 1.1855 
2025-08-10 19:58:14,595 - INFO - Epoch: 57/500, Iter: 4/119 -- train_loss: 1.1842 
2025-08-10 19:58:14,865 - INFO - Epoch: 57/500, Iter: 5/119 -- train_loss: 1.1845 
2025-08-10 19:58:15,145 - INFO - Epoch: 57/500, Iter: 6/119 -- train_loss: 1.1697 
2025-08-10 19:58:15,413 - INFO - Epoch: 57/500, Iter: 7/119 -- train_loss: 1.1613 
2025-08-10 19:58:15,749 - INFO - Epoch: 57/500, Iter: 8/119 -- train_loss: 1.1757 
2025-08-10 19:58:16,039 - INFO - Epoch: 57/500, Iter: 9/119 -- train_loss: 1.1839 
2025-08-10 19:58:28,951 - INFO - Epoch: 57/500, Iter: 10/119 -- train_loss: 1.1752 
2025-08-10 19:58:29,218 - INFO - Epoch: 57/500, Iter: 11/119 -- train_loss: 1.1764 
2025-08-10 19:58:29,501 - INFO - Epoch: 57/500, Iter: 12/119 -- train_loss: 1.1440 
2025-08-10 19:58:29,801 - INFO - Epoch: 57/500, Iter: 13/119 -- train_loss: 1.1608 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:02:19,314 - INFO - Epoch: 58/500, Iter: 1/119 -- train_loss: 1.0477 


[1/119]   1%|           [00:00<?]

2025-08-10 20:02:19,623 - INFO - Epoch: 58/500, Iter: 2/119 -- train_loss: 1.1756 
2025-08-10 20:02:19,911 - INFO - Epoch: 58/500, Iter: 3/119 -- train_loss: 1.1489 
2025-08-10 20:02:20,214 - INFO - Epoch: 58/500, Iter: 4/119 -- train_loss: 1.1751 
2025-08-10 20:02:20,531 - INFO - Epoch: 58/500, Iter: 5/119 -- train_loss: 1.1750 
2025-08-10 20:02:20,857 - INFO - Epoch: 58/500, Iter: 6/119 -- train_loss: 1.1692 
2025-08-10 20:02:21,162 - INFO - Epoch: 58/500, Iter: 7/119 -- train_loss: 1.1761 
2025-08-10 20:02:21,462 - INFO - Epoch: 58/500, Iter: 8/119 -- train_loss: 1.1748 
2025-08-10 20:02:31,293 - INFO - Epoch: 58/500, Iter: 9/119 -- train_loss: 1.1692 
2025-08-10 20:02:31,606 - INFO - Epoch: 58/500, Iter: 10/119 -- train_loss: 1.1857 
2025-08-10 20:02:31,922 - INFO - Epoch: 58/500, Iter: 11/119 -- train_loss: 1.1660 
2025-08-10 20:02:32,251 - INFO - Epoch: 58/500, Iter: 12/119 -- train_loss: 1.1769 
2025-08-10 20:02:32,509 - INFO - Epoch: 58/500, Iter: 13/119 -- train_loss: 1.1723 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:06:13,834 - INFO - Epoch: 59/500, Iter: 1/119 -- train_loss: 1.1762 


[1/119]   1%|           [00:00<?]

2025-08-10 20:06:14,104 - INFO - Epoch: 59/500, Iter: 2/119 -- train_loss: 1.1628 
2025-08-10 20:06:14,419 - INFO - Epoch: 59/500, Iter: 3/119 -- train_loss: 1.1628 
2025-08-10 20:06:14,730 - INFO - Epoch: 59/500, Iter: 4/119 -- train_loss: 1.1173 
2025-08-10 20:06:15,017 - INFO - Epoch: 59/500, Iter: 5/119 -- train_loss: 1.1311 
2025-08-10 20:06:15,330 - INFO - Epoch: 59/500, Iter: 6/119 -- train_loss: 1.1770 
2025-08-10 20:06:15,995 - INFO - Epoch: 59/500, Iter: 7/119 -- train_loss: 1.1676 
2025-08-10 20:06:16,336 - INFO - Epoch: 59/500, Iter: 8/119 -- train_loss: 1.1759 
2025-08-10 20:06:20,987 - INFO - Epoch: 59/500, Iter: 9/119 -- train_loss: 1.1730 
2025-08-10 20:06:21,254 - INFO - Epoch: 59/500, Iter: 10/119 -- train_loss: 1.1698 
2025-08-10 20:06:21,596 - INFO - Epoch: 59/500, Iter: 11/119 -- train_loss: 1.1021 
2025-08-10 20:06:21,907 - INFO - Epoch: 59/500, Iter: 12/119 -- train_loss: 1.1777 
2025-08-10 20:06:22,239 - INFO - Epoch: 59/500, Iter: 13/119 -- train_loss: 1.1281 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:10:11,183 - INFO - Epoch: 60/500, Iter: 1/119 -- train_loss: 1.1490 


[1/119]   1%|           [00:00<?]

2025-08-10 20:10:11,499 - INFO - Epoch: 60/500, Iter: 2/119 -- train_loss: 1.1747 
2025-08-10 20:10:11,771 - INFO - Epoch: 60/500, Iter: 3/119 -- train_loss: 1.1778 
2025-08-10 20:10:12,094 - INFO - Epoch: 60/500, Iter: 4/119 -- train_loss: 1.1785 
2025-08-10 20:10:14,687 - INFO - Epoch: 60/500, Iter: 5/119 -- train_loss: 1.1754 
2025-08-10 20:10:15,008 - INFO - Epoch: 60/500, Iter: 6/119 -- train_loss: 1.1768 
2025-08-10 20:10:15,297 - INFO - Epoch: 60/500, Iter: 7/119 -- train_loss: 1.1798 
2025-08-10 20:10:15,604 - INFO - Epoch: 60/500, Iter: 8/119 -- train_loss: 1.1799 
2025-08-10 20:10:15,926 - INFO - Epoch: 60/500, Iter: 9/119 -- train_loss: 1.1199 
2025-08-10 20:10:16,873 - INFO - Epoch: 60/500, Iter: 10/119 -- train_loss: 1.0132 
2025-08-10 20:10:17,192 - INFO - Epoch: 60/500, Iter: 11/119 -- train_loss: 1.1787 
2025-08-10 20:10:23,163 - INFO - Epoch: 60/500, Iter: 12/119 -- train_loss: 1.1394 
2025-08-10 20:10:23,451 - INFO - Epoch: 60/500, Iter: 13/119 -- train_loss: 1.1535 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:13:55,310 - INFO - Epoch: 61/500, Iter: 1/119 -- train_loss: 1.1760 


[1/119]   1%|           [00:00<?]

2025-08-10 20:13:55,618 - INFO - Epoch: 61/500, Iter: 2/119 -- train_loss: 1.1747 
2025-08-10 20:13:55,934 - INFO - Epoch: 61/500, Iter: 3/119 -- train_loss: 1.1078 
2025-08-10 20:13:56,218 - INFO - Epoch: 61/500, Iter: 4/119 -- train_loss: 1.1777 
2025-08-10 20:13:56,507 - INFO - Epoch: 61/500, Iter: 5/119 -- train_loss: 1.1492 
2025-08-10 20:13:56,825 - INFO - Epoch: 61/500, Iter: 6/119 -- train_loss: 1.1544 
2025-08-10 20:13:57,124 - INFO - Epoch: 61/500, Iter: 7/119 -- train_loss: 1.1783 
2025-08-10 20:13:57,453 - INFO - Epoch: 61/500, Iter: 8/119 -- train_loss: 1.1793 
2025-08-10 20:14:00,196 - INFO - Epoch: 61/500, Iter: 9/119 -- train_loss: 1.1782 
2025-08-10 20:14:02,830 - INFO - Epoch: 61/500, Iter: 10/119 -- train_loss: 1.1808 
2025-08-10 20:14:03,112 - INFO - Epoch: 61/500, Iter: 11/119 -- train_loss: 1.1752 
2025-08-10 20:14:03,410 - INFO - Epoch: 61/500, Iter: 12/119 -- train_loss: 1.1725 
2025-08-10 20:14:03,705 - INFO - Epoch: 61/500, Iter: 13/119 -- train_loss: 1.1652 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:17:38,787 - INFO - Epoch: 62/500, Iter: 1/119 -- train_loss: 1.1650 


[1/119]   1%|           [00:00<?]

2025-08-10 20:17:39,092 - INFO - Epoch: 62/500, Iter: 2/119 -- train_loss: 1.1760 
2025-08-10 20:17:39,400 - INFO - Epoch: 62/500, Iter: 3/119 -- train_loss: 1.1757 
2025-08-10 20:17:39,726 - INFO - Epoch: 62/500, Iter: 4/119 -- train_loss: 1.1581 
2025-08-10 20:17:46,777 - INFO - Epoch: 62/500, Iter: 5/119 -- train_loss: 1.1689 
2025-08-10 20:17:47,057 - INFO - Epoch: 62/500, Iter: 6/119 -- train_loss: 1.1750 
2025-08-10 20:17:47,361 - INFO - Epoch: 62/500, Iter: 7/119 -- train_loss: 1.1748 
2025-08-10 20:17:47,677 - INFO - Epoch: 62/500, Iter: 8/119 -- train_loss: 1.1656 
2025-08-10 20:17:49,757 - INFO - Epoch: 62/500, Iter: 9/119 -- train_loss: 1.1764 
2025-08-10 20:17:50,088 - INFO - Epoch: 62/500, Iter: 10/119 -- train_loss: 1.1768 
2025-08-10 20:17:50,371 - INFO - Epoch: 62/500, Iter: 11/119 -- train_loss: 1.1628 
2025-08-10 20:17:50,696 - INFO - Epoch: 62/500, Iter: 12/119 -- train_loss: 1.1747 
2025-08-10 20:17:51,459 - INFO - Epoch: 62/500, Iter: 13/119 -- train_loss: 1.1562 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:21:18,774 - INFO - Epoch: 63/500, Iter: 1/119 -- train_loss: 1.1857 


[1/119]   1%|           [00:00<?]

2025-08-10 20:21:19,398 - INFO - Epoch: 63/500, Iter: 2/119 -- train_loss: 1.1633 
2025-08-10 20:21:19,679 - INFO - Epoch: 63/500, Iter: 3/119 -- train_loss: 1.1690 
2025-08-10 20:21:19,984 - INFO - Epoch: 63/500, Iter: 4/119 -- train_loss: 1.1659 
2025-08-10 20:21:26,178 - INFO - Epoch: 63/500, Iter: 5/119 -- train_loss: 1.1357 
2025-08-10 20:21:26,451 - INFO - Epoch: 63/500, Iter: 6/119 -- train_loss: 1.1806 
2025-08-10 20:21:26,732 - INFO - Epoch: 63/500, Iter: 7/119 -- train_loss: 1.1812 
2025-08-10 20:21:27,056 - INFO - Epoch: 63/500, Iter: 8/119 -- train_loss: 1.1443 
2025-08-10 20:21:28,396 - INFO - Epoch: 63/500, Iter: 9/119 -- train_loss: 1.1592 
2025-08-10 20:21:28,665 - INFO - Epoch: 63/500, Iter: 10/119 -- train_loss: 1.1746 
2025-08-10 20:21:28,990 - INFO - Epoch: 63/500, Iter: 11/119 -- train_loss: 1.1766 
2025-08-10 20:21:29,267 - INFO - Epoch: 63/500, Iter: 12/119 -- train_loss: 1.1753 
2025-08-10 20:21:37,964 - INFO - Epoch: 63/500, Iter: 13/119 -- train_loss: 1.1749 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:25:03,903 - INFO - Epoch: 64/500, Iter: 1/119 -- train_loss: 1.1658 


[1/119]   1%|           [00:00<?]

2025-08-10 20:25:10,492 - INFO - Epoch: 64/500, Iter: 2/119 -- train_loss: 1.1751 
2025-08-10 20:25:10,824 - INFO - Epoch: 64/500, Iter: 3/119 -- train_loss: 1.1642 
2025-08-10 20:25:11,136 - INFO - Epoch: 64/500, Iter: 4/119 -- train_loss: 1.1802 
2025-08-10 20:25:11,425 - INFO - Epoch: 64/500, Iter: 5/119 -- train_loss: 1.1450 
2025-08-10 20:25:11,729 - INFO - Epoch: 64/500, Iter: 6/119 -- train_loss: 1.1778 
2025-08-10 20:25:12,045 - INFO - Epoch: 64/500, Iter: 7/119 -- train_loss: 1.1842 
2025-08-10 20:25:12,372 - INFO - Epoch: 64/500, Iter: 8/119 -- train_loss: 1.1782 
2025-08-10 20:25:12,693 - INFO - Epoch: 64/500, Iter: 9/119 -- train_loss: 1.1766 
2025-08-10 20:25:23,042 - INFO - Epoch: 64/500, Iter: 10/119 -- train_loss: 1.1770 
2025-08-10 20:25:23,310 - INFO - Epoch: 64/500, Iter: 11/119 -- train_loss: 1.1755 
2025-08-10 20:25:23,566 - INFO - Epoch: 64/500, Iter: 12/119 -- train_loss: 1.1749 
2025-08-10 20:25:23,839 - INFO - Epoch: 64/500, Iter: 13/119 -- train_loss: 1.1475 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:29:03,429 - INFO - Epoch: 65/500, Iter: 1/119 -- train_loss: 1.1748 


[1/119]   1%|           [00:00<?]

2025-08-10 20:29:03,740 - INFO - Epoch: 65/500, Iter: 2/119 -- train_loss: 1.1447 
2025-08-10 20:29:04,044 - INFO - Epoch: 65/500, Iter: 3/119 -- train_loss: 1.1649 
2025-08-10 20:29:04,336 - INFO - Epoch: 65/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-10 20:29:04,624 - INFO - Epoch: 65/500, Iter: 5/119 -- train_loss: 1.1769 
2025-08-10 20:29:04,902 - INFO - Epoch: 65/500, Iter: 6/119 -- train_loss: 1.1466 
2025-08-10 20:29:05,238 - INFO - Epoch: 65/500, Iter: 7/119 -- train_loss: 1.1778 
2025-08-10 20:29:05,548 - INFO - Epoch: 65/500, Iter: 8/119 -- train_loss: 1.1747 
2025-08-10 20:29:16,009 - INFO - Epoch: 65/500, Iter: 9/119 -- train_loss: 1.1696 
2025-08-10 20:29:16,313 - INFO - Epoch: 65/500, Iter: 10/119 -- train_loss: 1.1704 
2025-08-10 20:29:16,612 - INFO - Epoch: 65/500, Iter: 11/119 -- train_loss: 1.1745 
2025-08-10 20:29:16,898 - INFO - Epoch: 65/500, Iter: 12/119 -- train_loss: 1.1753 
2025-08-10 20:29:17,201 - INFO - Epoch: 65/500, Iter: 13/119 -- train_loss: 1.1751 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:33:05,522 - INFO - Epoch: 66/500, Iter: 1/119 -- train_loss: 1.1744 


[1/119]   1%|           [00:00<?]

2025-08-10 20:33:05,817 - INFO - Epoch: 66/500, Iter: 2/119 -- train_loss: 1.1677 
2025-08-10 20:33:06,788 - INFO - Epoch: 66/500, Iter: 3/119 -- train_loss: 1.1747 
2025-08-10 20:33:07,090 - INFO - Epoch: 66/500, Iter: 4/119 -- train_loss: 1.1639 
2025-08-10 20:33:07,439 - INFO - Epoch: 66/500, Iter: 5/119 -- train_loss: 1.1744 
2025-08-10 20:33:07,759 - INFO - Epoch: 66/500, Iter: 6/119 -- train_loss: 1.1753 
2025-08-10 20:33:08,032 - INFO - Epoch: 66/500, Iter: 7/119 -- train_loss: 1.1501 
2025-08-10 20:33:08,354 - INFO - Epoch: 66/500, Iter: 8/119 -- train_loss: 1.1759 
2025-08-10 20:33:12,056 - INFO - Epoch: 66/500, Iter: 9/119 -- train_loss: 1.1558 
2025-08-10 20:33:12,384 - INFO - Epoch: 66/500, Iter: 10/119 -- train_loss: 1.1744 
2025-08-10 20:33:12,675 - INFO - Epoch: 66/500, Iter: 11/119 -- train_loss: 1.1747 
2025-08-10 20:33:12,956 - INFO - Epoch: 66/500, Iter: 12/119 -- train_loss: 1.1636 
2025-08-10 20:33:13,277 - INFO - Epoch: 66/500, Iter: 13/119 -- train_loss: 1.1745 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:36:50,072 - INFO - Epoch: 67/500, Iter: 1/119 -- train_loss: 1.1697 


[1/119]   1%|           [00:00<?]

2025-08-10 20:36:50,359 - INFO - Epoch: 67/500, Iter: 2/119 -- train_loss: 1.1129 
2025-08-10 20:36:50,605 - INFO - Epoch: 67/500, Iter: 3/119 -- train_loss: 1.1746 
2025-08-10 20:36:50,888 - INFO - Epoch: 67/500, Iter: 4/119 -- train_loss: 1.1785 
2025-08-10 20:36:51,192 - INFO - Epoch: 67/500, Iter: 5/119 -- train_loss: 1.1481 
2025-08-10 20:36:51,492 - INFO - Epoch: 67/500, Iter: 6/119 -- train_loss: 1.1762 
2025-08-10 20:36:51,766 - INFO - Epoch: 67/500, Iter: 7/119 -- train_loss: 1.1275 
2025-08-10 20:36:52,091 - INFO - Epoch: 67/500, Iter: 8/119 -- train_loss: 1.1411 
2025-08-10 20:36:54,125 - INFO - Epoch: 67/500, Iter: 9/119 -- train_loss: 0.9373 
2025-08-10 20:36:54,455 - INFO - Epoch: 67/500, Iter: 10/119 -- train_loss: 1.1817 
2025-08-10 20:36:54,770 - INFO - Epoch: 67/500, Iter: 11/119 -- train_loss: 1.1755 
2025-08-10 20:36:55,096 - INFO - Epoch: 67/500, Iter: 12/119 -- train_loss: 1.1674 
2025-08-10 20:36:55,380 - INFO - Epoch: 67/500, Iter: 13/119 -- train_loss: 1.1648 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:40:41,428 - INFO - Epoch: 68/500, Iter: 1/119 -- train_loss: 1.1756 


[1/119]   1%|           [00:00<?]

2025-08-10 20:40:41,745 - INFO - Epoch: 68/500, Iter: 2/119 -- train_loss: 1.1828 
2025-08-10 20:40:42,320 - INFO - Epoch: 68/500, Iter: 3/119 -- train_loss: 1.1779 
2025-08-10 20:40:42,606 - INFO - Epoch: 68/500, Iter: 4/119 -- train_loss: 1.1709 
2025-08-10 20:40:49,278 - INFO - Epoch: 68/500, Iter: 5/119 -- train_loss: 1.1754 
2025-08-10 20:40:49,590 - INFO - Epoch: 68/500, Iter: 6/119 -- train_loss: 1.1621 
2025-08-10 20:40:49,876 - INFO - Epoch: 68/500, Iter: 7/119 -- train_loss: 1.1493 
2025-08-10 20:40:50,207 - INFO - Epoch: 68/500, Iter: 8/119 -- train_loss: 1.1745 
2025-08-10 20:40:51,497 - INFO - Epoch: 68/500, Iter: 9/119 -- train_loss: 1.1607 
2025-08-10 20:40:51,827 - INFO - Epoch: 68/500, Iter: 10/119 -- train_loss: 1.1522 
2025-08-10 20:40:52,116 - INFO - Epoch: 68/500, Iter: 11/119 -- train_loss: 1.1746 
2025-08-10 20:40:52,449 - INFO - Epoch: 68/500, Iter: 12/119 -- train_loss: 1.1638 
2025-08-10 20:40:58,410 - INFO - Epoch: 68/500, Iter: 13/119 -- train_loss: 1.1664 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:44:33,929 - INFO - Epoch: 69/500, Iter: 1/119 -- train_loss: 1.1654 


[1/119]   1%|           [00:00<?]

2025-08-10 20:44:34,760 - INFO - Epoch: 69/500, Iter: 2/119 -- train_loss: 1.1699 
2025-08-10 20:44:35,053 - INFO - Epoch: 69/500, Iter: 3/119 -- train_loss: 1.1745 
2025-08-10 20:44:37,839 - INFO - Epoch: 69/500, Iter: 4/119 -- train_loss: 1.1744 
2025-08-10 20:44:38,156 - INFO - Epoch: 69/500, Iter: 5/119 -- train_loss: 1.1576 
2025-08-10 20:44:38,466 - INFO - Epoch: 69/500, Iter: 6/119 -- train_loss: 1.1489 
2025-08-10 20:44:39,852 - INFO - Epoch: 69/500, Iter: 7/119 -- train_loss: 1.1636 
2025-08-10 20:44:40,167 - INFO - Epoch: 69/500, Iter: 8/119 -- train_loss: 1.1510 
2025-08-10 20:44:42,808 - INFO - Epoch: 69/500, Iter: 9/119 -- train_loss: 1.1520 
2025-08-10 20:44:43,131 - INFO - Epoch: 69/500, Iter: 10/119 -- train_loss: 1.1744 
2025-08-10 20:44:43,414 - INFO - Epoch: 69/500, Iter: 11/119 -- train_loss: 1.1618 
2025-08-10 20:44:43,771 - INFO - Epoch: 69/500, Iter: 12/119 -- train_loss: 1.1695 
2025-08-10 20:44:44,046 - INFO - Epoch: 69/500, Iter: 13/119 -- train_loss: 1.1744 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:48:15,091 - INFO - Epoch: 70/500, Iter: 1/119 -- train_loss: 1.1562 


[1/119]   1%|           [00:00<?]

2025-08-10 20:48:15,515 - INFO - Epoch: 70/500, Iter: 2/119 -- train_loss: 1.1754 
2025-08-10 20:48:24,082 - INFO - Epoch: 70/500, Iter: 3/119 -- train_loss: 1.1744 
2025-08-10 20:48:24,330 - INFO - Epoch: 70/500, Iter: 4/119 -- train_loss: 1.1373 
2025-08-10 20:48:24,647 - INFO - Epoch: 70/500, Iter: 5/119 -- train_loss: 1.1691 
2025-08-10 20:48:24,948 - INFO - Epoch: 70/500, Iter: 6/119 -- train_loss: 1.1762 
2025-08-10 20:48:25,225 - INFO - Epoch: 70/500, Iter: 7/119 -- train_loss: 1.1502 
2025-08-10 20:48:25,535 - INFO - Epoch: 70/500, Iter: 8/119 -- train_loss: 1.1463 
2025-08-10 20:48:25,840 - INFO - Epoch: 70/500, Iter: 9/119 -- train_loss: 1.1747 
2025-08-10 20:48:26,163 - INFO - Epoch: 70/500, Iter: 10/119 -- train_loss: 1.1644 
2025-08-10 20:48:31,031 - INFO - Epoch: 70/500, Iter: 11/119 -- train_loss: 1.1343 
2025-08-10 20:48:31,339 - INFO - Epoch: 70/500, Iter: 12/119 -- train_loss: 1.1776 
2025-08-10 20:48:31,636 - INFO - Epoch: 70/500, Iter: 13/119 -- train_loss: 1.1772 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:52:01,616 - INFO - Epoch: 71/500, Iter: 1/119 -- train_loss: 1.1566 


[1/119]   1%|           [00:00<?]

2025-08-10 20:52:01,916 - INFO - Epoch: 71/500, Iter: 2/119 -- train_loss: 1.1559 
2025-08-10 20:52:02,219 - INFO - Epoch: 71/500, Iter: 3/119 -- train_loss: 1.1329 
2025-08-10 20:52:02,521 - INFO - Epoch: 71/500, Iter: 4/119 -- train_loss: 1.1745 
2025-08-10 20:52:02,866 - INFO - Epoch: 71/500, Iter: 5/119 -- train_loss: 1.1575 
2025-08-10 20:52:03,189 - INFO - Epoch: 71/500, Iter: 6/119 -- train_loss: 1.1808 
2025-08-10 20:52:03,473 - INFO - Epoch: 71/500, Iter: 7/119 -- train_loss: 1.1857 
2025-08-10 20:52:03,822 - INFO - Epoch: 71/500, Iter: 8/119 -- train_loss: 1.0674 
2025-08-10 20:52:08,161 - INFO - Epoch: 71/500, Iter: 9/119 -- train_loss: 1.1298 
2025-08-10 20:52:08,422 - INFO - Epoch: 71/500, Iter: 10/119 -- train_loss: 1.1839 
2025-08-10 20:52:08,722 - INFO - Epoch: 71/500, Iter: 11/119 -- train_loss: 1.1612 
2025-08-10 20:52:09,083 - INFO - Epoch: 71/500, Iter: 12/119 -- train_loss: 1.1764 
2025-08-10 20:52:09,368 - INFO - Epoch: 71/500, Iter: 13/119 -- train_loss: 1.1823 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:55:40,766 - INFO - Epoch: 72/500, Iter: 1/119 -- train_loss: 1.1207 


[1/119]   1%|           [00:00<?]

2025-08-10 20:55:42,107 - INFO - Epoch: 72/500, Iter: 2/119 -- train_loss: 1.1749 
2025-08-10 20:55:46,106 - INFO - Epoch: 72/500, Iter: 3/119 -- train_loss: 1.1746 
2025-08-10 20:55:46,416 - INFO - Epoch: 72/500, Iter: 4/119 -- train_loss: 1.1754 
2025-08-10 20:55:46,719 - INFO - Epoch: 72/500, Iter: 5/119 -- train_loss: 0.9523 
2025-08-10 20:55:47,034 - INFO - Epoch: 72/500, Iter: 6/119 -- train_loss: 1.1759 
2025-08-10 20:55:47,320 - INFO - Epoch: 72/500, Iter: 7/119 -- train_loss: 1.1575 
2025-08-10 20:55:47,622 - INFO - Epoch: 72/500, Iter: 8/119 -- train_loss: 1.1744 
2025-08-10 20:55:47,924 - INFO - Epoch: 72/500, Iter: 9/119 -- train_loss: 1.1560 
2025-08-10 20:55:49,090 - INFO - Epoch: 72/500, Iter: 10/119 -- train_loss: 1.1655 
2025-08-10 20:55:56,920 - INFO - Epoch: 72/500, Iter: 11/119 -- train_loss: 1.1739 
2025-08-10 20:55:57,236 - INFO - Epoch: 72/500, Iter: 12/119 -- train_loss: 1.1766 
2025-08-10 20:56:00,670 - INFO - Epoch: 72/500, Iter: 13/119 -- train_loss: 1.1744 


[1/20]   5%|5          [00:00<?]

2025-08-10 20:59:28,265 - INFO - Epoch: 73/500, Iter: 1/119 -- train_loss: 1.1702 


[1/119]   1%|           [00:00<?]

2025-08-10 20:59:28,581 - INFO - Epoch: 73/500, Iter: 2/119 -- train_loss: 1.1790 
2025-08-10 20:59:33,860 - INFO - Epoch: 73/500, Iter: 3/119 -- train_loss: 1.1424 
2025-08-10 20:59:35,237 - INFO - Epoch: 73/500, Iter: 4/119 -- train_loss: 1.1749 
2025-08-10 20:59:35,556 - INFO - Epoch: 73/500, Iter: 5/119 -- train_loss: 1.1786 
2025-08-10 20:59:35,867 - INFO - Epoch: 73/500, Iter: 6/119 -- train_loss: 1.1541 
2025-08-10 20:59:36,167 - INFO - Epoch: 73/500, Iter: 7/119 -- train_loss: 1.1746 
2025-08-10 20:59:36,497 - INFO - Epoch: 73/500, Iter: 8/119 -- train_loss: 1.1769 
2025-08-10 20:59:39,977 - INFO - Epoch: 73/500, Iter: 9/119 -- train_loss: 1.1096 
2025-08-10 20:59:40,317 - INFO - Epoch: 73/500, Iter: 10/119 -- train_loss: 1.1359 
2025-08-10 20:59:42,652 - INFO - Epoch: 73/500, Iter: 11/119 -- train_loss: 1.1652 
2025-08-10 20:59:42,952 - INFO - Epoch: 73/500, Iter: 12/119 -- train_loss: 1.1809 
2025-08-10 20:59:43,224 - INFO - Epoch: 73/500, Iter: 13/119 -- train_loss: 1.0946 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:03:15,195 - INFO - Epoch: 74/500, Iter: 1/119 -- train_loss: 1.1561 


[1/119]   1%|           [00:00<?]

2025-08-10 21:03:16,916 - INFO - Epoch: 74/500, Iter: 2/119 -- train_loss: 1.1809 
2025-08-10 21:03:22,195 - INFO - Epoch: 74/500, Iter: 3/119 -- train_loss: 1.1812 
2025-08-10 21:03:22,503 - INFO - Epoch: 74/500, Iter: 4/119 -- train_loss: 1.1810 
2025-08-10 21:03:22,781 - INFO - Epoch: 74/500, Iter: 5/119 -- train_loss: 1.1767 
2025-08-10 21:03:23,088 - INFO - Epoch: 74/500, Iter: 6/119 -- train_loss: 1.1608 
2025-08-10 21:03:23,374 - INFO - Epoch: 74/500, Iter: 7/119 -- train_loss: 1.1576 
2025-08-10 21:03:23,687 - INFO - Epoch: 74/500, Iter: 8/119 -- train_loss: 1.1792 
2025-08-10 21:03:23,977 - INFO - Epoch: 74/500, Iter: 9/119 -- train_loss: 1.1754 
2025-08-10 21:03:26,667 - INFO - Epoch: 74/500, Iter: 10/119 -- train_loss: 1.1777 
2025-08-10 21:03:29,760 - INFO - Epoch: 74/500, Iter: 11/119 -- train_loss: 1.1633 
2025-08-10 21:03:30,036 - INFO - Epoch: 74/500, Iter: 12/119 -- train_loss: 1.1749 
2025-08-10 21:03:30,344 - INFO - Epoch: 74/500, Iter: 13/119 -- train_loss: 1.1560 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:07:04,353 - INFO - Epoch: 75/500, Iter: 1/119 -- train_loss: 1.1761 


[1/119]   1%|           [00:00<?]

2025-08-10 21:07:04,692 - INFO - Epoch: 75/500, Iter: 2/119 -- train_loss: 1.1796 
2025-08-10 21:07:05,023 - INFO - Epoch: 75/500, Iter: 3/119 -- train_loss: 1.1599 
2025-08-10 21:07:05,319 - INFO - Epoch: 75/500, Iter: 4/119 -- train_loss: 1.1651 
2025-08-10 21:07:05,596 - INFO - Epoch: 75/500, Iter: 5/119 -- train_loss: 1.1110 
2025-08-10 21:07:05,872 - INFO - Epoch: 75/500, Iter: 6/119 -- train_loss: 1.1706 
2025-08-10 21:07:06,202 - INFO - Epoch: 75/500, Iter: 7/119 -- train_loss: 1.1747 
2025-08-10 21:07:06,486 - INFO - Epoch: 75/500, Iter: 8/119 -- train_loss: 1.1673 
2025-08-10 21:07:13,601 - INFO - Epoch: 75/500, Iter: 9/119 -- train_loss: 1.1886 
2025-08-10 21:07:13,908 - INFO - Epoch: 75/500, Iter: 10/119 -- train_loss: 1.1708 
2025-08-10 21:07:14,179 - INFO - Epoch: 75/500, Iter: 11/119 -- train_loss: 1.1475 
2025-08-10 21:07:14,485 - INFO - Epoch: 75/500, Iter: 12/119 -- train_loss: 1.1728 
2025-08-10 21:07:14,794 - INFO - Epoch: 75/500, Iter: 13/119 -- train_loss: 1.1744 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:10:45,842 - INFO - Epoch: 76/500, Iter: 1/119 -- train_loss: 1.1726 


[1/119]   1%|           [00:00<?]

2025-08-10 21:10:46,159 - INFO - Epoch: 76/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-10 21:10:48,544 - INFO - Epoch: 76/500, Iter: 3/119 -- train_loss: 1.1746 
2025-08-10 21:10:48,833 - INFO - Epoch: 76/500, Iter: 4/119 -- train_loss: 1.1655 
2025-08-10 21:10:49,172 - INFO - Epoch: 76/500, Iter: 5/119 -- train_loss: 1.1754 
2025-08-10 21:10:49,449 - INFO - Epoch: 76/500, Iter: 6/119 -- train_loss: 1.0970 
2025-08-10 21:10:49,764 - INFO - Epoch: 76/500, Iter: 7/119 -- train_loss: 1.1676 
2025-08-10 21:10:50,087 - INFO - Epoch: 76/500, Iter: 8/119 -- train_loss: 1.1511 
2025-08-10 21:10:53,798 - INFO - Epoch: 76/500, Iter: 9/119 -- train_loss: 1.1453 
2025-08-10 21:10:54,416 - INFO - Epoch: 76/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-10 21:10:55,693 - INFO - Epoch: 76/500, Iter: 11/119 -- train_loss: 1.1814 
2025-08-10 21:10:56,012 - INFO - Epoch: 76/500, Iter: 12/119 -- train_loss: 1.1461 
2025-08-10 21:10:56,330 - INFO - Epoch: 76/500, Iter: 13/119 -- train_loss: 1.1509 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:14:37,213 - INFO - Epoch: 77/500, Iter: 1/119 -- train_loss: 1.0432 


[1/119]   1%|           [00:00<?]

2025-08-10 21:14:37,517 - INFO - Epoch: 77/500, Iter: 2/119 -- train_loss: 1.1754 
2025-08-10 21:14:37,826 - INFO - Epoch: 77/500, Iter: 3/119 -- train_loss: 1.1776 
2025-08-10 21:14:38,150 - INFO - Epoch: 77/500, Iter: 4/119 -- train_loss: 1.1734 
2025-08-10 21:14:38,467 - INFO - Epoch: 77/500, Iter: 5/119 -- train_loss: 1.1279 
2025-08-10 21:14:38,776 - INFO - Epoch: 77/500, Iter: 6/119 -- train_loss: 1.1725 
2025-08-10 21:14:39,103 - INFO - Epoch: 77/500, Iter: 7/119 -- train_loss: 1.1378 
2025-08-10 21:14:39,421 - INFO - Epoch: 77/500, Iter: 8/119 -- train_loss: 1.1494 
2025-08-10 21:14:46,022 - INFO - Epoch: 77/500, Iter: 9/119 -- train_loss: 1.1593 
2025-08-10 21:14:46,272 - INFO - Epoch: 77/500, Iter: 10/119 -- train_loss: 1.1744 
2025-08-10 21:14:46,546 - INFO - Epoch: 77/500, Iter: 11/119 -- train_loss: 1.1764 
2025-08-10 21:14:46,836 - INFO - Epoch: 77/500, Iter: 12/119 -- train_loss: 1.1723 
2025-08-10 21:14:47,126 - INFO - Epoch: 77/500, Iter: 13/119 -- train_loss: 1.1744 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:18:31,097 - INFO - Epoch: 78/500, Iter: 1/119 -- train_loss: 1.1744 


[1/119]   1%|           [00:00<?]

2025-08-10 21:18:31,416 - INFO - Epoch: 78/500, Iter: 2/119 -- train_loss: 0.9188 
2025-08-10 21:18:31,712 - INFO - Epoch: 78/500, Iter: 3/119 -- train_loss: 1.1308 
2025-08-10 21:18:32,475 - INFO - Epoch: 78/500, Iter: 4/119 -- train_loss: 1.1744 
2025-08-10 21:18:32,808 - INFO - Epoch: 78/500, Iter: 5/119 -- train_loss: 1.1817 
2025-08-10 21:18:33,141 - INFO - Epoch: 78/500, Iter: 6/119 -- train_loss: 1.1710 
2025-08-10 21:18:33,458 - INFO - Epoch: 78/500, Iter: 7/119 -- train_loss: 1.1804 
2025-08-10 21:18:33,789 - INFO - Epoch: 78/500, Iter: 8/119 -- train_loss: 1.1890 
2025-08-10 21:18:43,108 - INFO - Epoch: 78/500, Iter: 9/119 -- train_loss: 1.1703 
2025-08-10 21:18:43,407 - INFO - Epoch: 78/500, Iter: 10/119 -- train_loss: 1.1849 
2025-08-10 21:18:43,697 - INFO - Epoch: 78/500, Iter: 11/119 -- train_loss: 1.1635 
2025-08-10 21:18:44,009 - INFO - Epoch: 78/500, Iter: 12/119 -- train_loss: 1.1751 
2025-08-10 21:18:44,297 - INFO - Epoch: 78/500, Iter: 13/119 -- train_loss: 1.1628 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:22:12,473 - INFO - Epoch: 79/500, Iter: 1/119 -- train_loss: 1.0366 


[1/119]   1%|           [00:00<?]

2025-08-10 21:22:18,066 - INFO - Epoch: 79/500, Iter: 2/119 -- train_loss: 1.1335 
2025-08-10 21:22:18,379 - INFO - Epoch: 79/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-10 21:22:18,686 - INFO - Epoch: 79/500, Iter: 4/119 -- train_loss: 1.1698 
2025-08-10 21:22:18,955 - INFO - Epoch: 79/500, Iter: 5/119 -- train_loss: 1.1744 
2025-08-10 21:22:19,285 - INFO - Epoch: 79/500, Iter: 6/119 -- train_loss: 1.0766 
2025-08-10 21:22:19,587 - INFO - Epoch: 79/500, Iter: 7/119 -- train_loss: 1.1644 
2025-08-10 21:22:19,906 - INFO - Epoch: 79/500, Iter: 8/119 -- train_loss: 1.1330 
2025-08-10 21:22:21,106 - INFO - Epoch: 79/500, Iter: 9/119 -- train_loss: 1.1747 
2025-08-10 21:22:22,487 - INFO - Epoch: 79/500, Iter: 10/119 -- train_loss: 1.1373 
2025-08-10 21:22:22,799 - INFO - Epoch: 79/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-10 21:22:26,441 - INFO - Epoch: 79/500, Iter: 12/119 -- train_loss: 1.1744 
2025-08-10 21:22:26,740 - INFO - Epoch: 79/500, Iter: 13/119 -- train_loss: 1.0197 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:26:01,982 - INFO - Epoch: 80/500, Iter: 1/119 -- train_loss: 1.1746 


[1/119]   1%|           [00:00<?]

2025-08-10 21:26:02,278 - INFO - Epoch: 80/500, Iter: 2/119 -- train_loss: 1.1698 
2025-08-10 21:26:02,591 - INFO - Epoch: 80/500, Iter: 3/119 -- train_loss: 0.9521 
2025-08-10 21:26:02,890 - INFO - Epoch: 80/500, Iter: 4/119 -- train_loss: 1.1237 
2025-08-10 21:26:03,205 - INFO - Epoch: 80/500, Iter: 5/119 -- train_loss: 1.1773 
2025-08-10 21:26:03,490 - INFO - Epoch: 80/500, Iter: 6/119 -- train_loss: 1.1654 
2025-08-10 21:26:03,793 - INFO - Epoch: 80/500, Iter: 7/119 -- train_loss: 1.1275 
2025-08-10 21:26:04,092 - INFO - Epoch: 80/500, Iter: 8/119 -- train_loss: 1.1709 
2025-08-10 21:26:08,957 - INFO - Epoch: 80/500, Iter: 9/119 -- train_loss: 1.1801 
2025-08-10 21:26:09,240 - INFO - Epoch: 80/500, Iter: 10/119 -- train_loss: 1.1764 
2025-08-10 21:26:09,536 - INFO - Epoch: 80/500, Iter: 11/119 -- train_loss: 1.1563 
2025-08-10 21:26:09,830 - INFO - Epoch: 80/500, Iter: 12/119 -- train_loss: 1.1244 
2025-08-10 21:26:10,117 - INFO - Epoch: 80/500, Iter: 13/119 -- train_loss: 1.1787 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:29:38,965 - INFO - Epoch: 81/500, Iter: 1/119 -- train_loss: 1.1751 


[1/119]   1%|           [00:00<?]

2025-08-10 21:29:39,301 - INFO - Epoch: 81/500, Iter: 2/119 -- train_loss: 1.1407 
2025-08-10 21:29:39,569 - INFO - Epoch: 81/500, Iter: 3/119 -- train_loss: 1.1682 
2025-08-10 21:29:39,870 - INFO - Epoch: 81/500, Iter: 4/119 -- train_loss: 1.1610 
2025-08-10 21:29:40,168 - INFO - Epoch: 81/500, Iter: 5/119 -- train_loss: 1.1436 
2025-08-10 21:29:40,468 - INFO - Epoch: 81/500, Iter: 6/119 -- train_loss: 1.1743 
2025-08-10 21:29:40,789 - INFO - Epoch: 81/500, Iter: 7/119 -- train_loss: 1.1294 
2025-08-10 21:29:41,047 - INFO - Epoch: 81/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-10 21:29:45,887 - INFO - Epoch: 81/500, Iter: 9/119 -- train_loss: 1.1756 
2025-08-10 21:29:46,203 - INFO - Epoch: 81/500, Iter: 10/119 -- train_loss: 1.1467 
2025-08-10 21:29:46,525 - INFO - Epoch: 81/500, Iter: 11/119 -- train_loss: 1.1662 
2025-08-10 21:29:46,818 - INFO - Epoch: 81/500, Iter: 12/119 -- train_loss: 1.1751 
2025-08-10 21:29:47,107 - INFO - Epoch: 81/500, Iter: 13/119 -- train_loss: 1.1750 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:33:19,212 - INFO - Epoch: 82/500, Iter: 1/119 -- train_loss: 1.1744 


[1/119]   1%|           [00:00<?]

2025-08-10 21:33:23,089 - INFO - Epoch: 82/500, Iter: 2/119 -- train_loss: 1.1786 
2025-08-10 21:33:23,365 - INFO - Epoch: 82/500, Iter: 3/119 -- train_loss: 1.1253 
2025-08-10 21:33:27,549 - INFO - Epoch: 82/500, Iter: 4/119 -- train_loss: 0.9948 
2025-08-10 21:33:27,839 - INFO - Epoch: 82/500, Iter: 5/119 -- train_loss: 1.1772 
2025-08-10 21:33:28,161 - INFO - Epoch: 82/500, Iter: 6/119 -- train_loss: 1.1486 
2025-08-10 21:33:28,483 - INFO - Epoch: 82/500, Iter: 7/119 -- train_loss: 1.1745 
2025-08-10 21:33:28,810 - INFO - Epoch: 82/500, Iter: 8/119 -- train_loss: 1.1709 
2025-08-10 21:33:29,134 - INFO - Epoch: 82/500, Iter: 9/119 -- train_loss: 1.1623 
2025-08-10 21:33:33,937 - INFO - Epoch: 82/500, Iter: 10/119 -- train_loss: 1.0549 
2025-08-10 21:33:34,217 - INFO - Epoch: 82/500, Iter: 11/119 -- train_loss: 1.1705 
2025-08-10 21:33:34,545 - INFO - Epoch: 82/500, Iter: 12/119 -- train_loss: 1.1796 
2025-08-10 21:33:34,859 - INFO - Epoch: 82/500, Iter: 13/119 -- train_loss: 1.1423 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:37:04,727 - INFO - Epoch: 83/500, Iter: 1/119 -- train_loss: 1.1745 


[1/119]   1%|           [00:00<?]

2025-08-10 21:37:05,042 - INFO - Epoch: 83/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-10 21:37:09,286 - INFO - Epoch: 83/500, Iter: 3/119 -- train_loss: 1.1733 
2025-08-10 21:37:14,078 - INFO - Epoch: 83/500, Iter: 4/119 -- train_loss: 1.1531 
2025-08-10 21:37:14,683 - INFO - Epoch: 83/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-10 21:37:14,979 - INFO - Epoch: 83/500, Iter: 6/119 -- train_loss: 1.1346 
2025-08-10 21:37:15,284 - INFO - Epoch: 83/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-10 21:37:15,621 - INFO - Epoch: 83/500, Iter: 8/119 -- train_loss: 1.1600 
2025-08-10 21:37:15,950 - INFO - Epoch: 83/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-10 21:37:16,271 - INFO - Epoch: 83/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-10 21:37:16,584 - INFO - Epoch: 83/500, Iter: 11/119 -- train_loss: 1.1755 
2025-08-10 21:37:23,099 - INFO - Epoch: 83/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-10 21:37:23,389 - INFO - Epoch: 83/500, Iter: 13/119 -- train_loss: 1.1638 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:40:39,839 - INFO - Epoch: 84/500, Iter: 1/119 -- train_loss: 1.1278 


[1/119]   1%|           [00:00<?]

2025-08-10 21:40:40,444 - INFO - Epoch: 84/500, Iter: 2/119 -- train_loss: 1.1482 
2025-08-10 21:40:40,728 - INFO - Epoch: 84/500, Iter: 3/119 -- train_loss: 1.1562 
2025-08-10 21:40:41,011 - INFO - Epoch: 84/500, Iter: 4/119 -- train_loss: 1.1654 
2025-08-10 21:40:41,276 - INFO - Epoch: 84/500, Iter: 5/119 -- train_loss: 1.1726 
2025-08-10 21:40:41,540 - INFO - Epoch: 84/500, Iter: 6/119 -- train_loss: 1.0816 
2025-08-10 21:40:41,843 - INFO - Epoch: 84/500, Iter: 7/119 -- train_loss: 1.1771 
2025-08-10 21:40:42,135 - INFO - Epoch: 84/500, Iter: 8/119 -- train_loss: 1.1745 
2025-08-10 21:40:44,435 - INFO - Epoch: 84/500, Iter: 9/119 -- train_loss: 1.1758 
2025-08-10 21:40:44,753 - INFO - Epoch: 84/500, Iter: 10/119 -- train_loss: 1.1792 
2025-08-10 21:40:45,060 - INFO - Epoch: 84/500, Iter: 11/119 -- train_loss: 1.1223 
2025-08-10 21:40:45,355 - INFO - Epoch: 84/500, Iter: 12/119 -- train_loss: 1.1478 
2025-08-10 21:40:45,648 - INFO - Epoch: 84/500, Iter: 13/119 -- train_loss: 1.1750 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:43:58,300 - INFO - Epoch: 85/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-10 21:44:03,095 - INFO - Epoch: 85/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-10 21:44:03,383 - INFO - Epoch: 85/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-10 21:44:03,677 - INFO - Epoch: 85/500, Iter: 4/119 -- train_loss: 1.0849 
2025-08-10 21:44:03,964 - INFO - Epoch: 85/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-10 21:44:04,242 - INFO - Epoch: 85/500, Iter: 6/119 -- train_loss: 1.1750 
2025-08-10 21:44:04,526 - INFO - Epoch: 85/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-10 21:44:04,812 - INFO - Epoch: 85/500, Iter: 8/119 -- train_loss: 1.1686 
2025-08-10 21:44:05,132 - INFO - Epoch: 85/500, Iter: 9/119 -- train_loss: 1.0558 
2025-08-10 21:44:10,860 - INFO - Epoch: 85/500, Iter: 10/119 -- train_loss: 1.1574 
2025-08-10 21:44:11,150 - INFO - Epoch: 85/500, Iter: 11/119 -- train_loss: 1.1646 
2025-08-10 21:44:11,462 - INFO - Epoch: 85/500, Iter: 12/119 -- train_loss: 1.1674 
2025-08-10 21:44:11,762 - INFO - Epoch: 85/500, Iter: 13/119 -- train_loss: 1.1762 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:47:46,653 - INFO - Epoch: 86/500, Iter: 1/119 -- train_loss: 1.1478 


[1/119]   1%|           [00:00<?]

2025-08-10 21:47:46,971 - INFO - Epoch: 86/500, Iter: 2/119 -- train_loss: 1.1747 
2025-08-10 21:47:47,387 - INFO - Epoch: 86/500, Iter: 3/119 -- train_loss: 1.1764 
2025-08-10 21:47:48,675 - INFO - Epoch: 86/500, Iter: 4/119 -- train_loss: 1.1747 
2025-08-10 21:47:48,969 - INFO - Epoch: 86/500, Iter: 5/119 -- train_loss: 1.1746 
2025-08-10 21:47:49,277 - INFO - Epoch: 86/500, Iter: 6/119 -- train_loss: 1.1744 
2025-08-10 21:47:51,909 - INFO - Epoch: 86/500, Iter: 7/119 -- train_loss: 1.1762 
2025-08-10 21:47:52,203 - INFO - Epoch: 86/500, Iter: 8/119 -- train_loss: 1.1388 
2025-08-10 21:47:57,627 - INFO - Epoch: 86/500, Iter: 9/119 -- train_loss: 1.1778 
2025-08-10 21:47:57,924 - INFO - Epoch: 86/500, Iter: 10/119 -- train_loss: 1.1479 
2025-08-10 21:47:58,261 - INFO - Epoch: 86/500, Iter: 11/119 -- train_loss: 1.1434 
2025-08-10 21:47:58,573 - INFO - Epoch: 86/500, Iter: 12/119 -- train_loss: 1.1541 
2025-08-10 21:47:58,889 - INFO - Epoch: 86/500, Iter: 13/119 -- train_loss: 1.1710 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:51:36,777 - INFO - Epoch: 87/500, Iter: 1/119 -- train_loss: 1.1666 


[1/119]   1%|           [00:00<?]

2025-08-10 21:51:37,096 - INFO - Epoch: 87/500, Iter: 2/119 -- train_loss: 1.1673 
2025-08-10 21:51:38,247 - INFO - Epoch: 87/500, Iter: 3/119 -- train_loss: 1.1935 
2025-08-10 21:51:38,563 - INFO - Epoch: 87/500, Iter: 4/119 -- train_loss: 1.1781 
2025-08-10 21:51:38,877 - INFO - Epoch: 87/500, Iter: 5/119 -- train_loss: 1.1776 
2025-08-10 21:51:39,222 - INFO - Epoch: 87/500, Iter: 6/119 -- train_loss: 1.1552 
2025-08-10 21:51:39,511 - INFO - Epoch: 87/500, Iter: 7/119 -- train_loss: 1.1096 
2025-08-10 21:51:39,822 - INFO - Epoch: 87/500, Iter: 8/119 -- train_loss: 1.1745 
2025-08-10 21:51:47,397 - INFO - Epoch: 87/500, Iter: 9/119 -- train_loss: 1.1642 
2025-08-10 21:51:47,689 - INFO - Epoch: 87/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-10 21:51:47,966 - INFO - Epoch: 87/500, Iter: 11/119 -- train_loss: 1.1771 
2025-08-10 21:51:48,259 - INFO - Epoch: 87/500, Iter: 12/119 -- train_loss: 1.1636 
2025-08-10 21:51:48,579 - INFO - Epoch: 87/500, Iter: 13/119 -- train_loss: 1.1667 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:55:28,914 - INFO - Epoch: 88/500, Iter: 1/119 -- train_loss: 1.1601 


[1/119]   1%|           [00:00<?]

2025-08-10 21:55:30,029 - INFO - Epoch: 88/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-10 21:55:30,854 - INFO - Epoch: 88/500, Iter: 3/119 -- train_loss: 1.1750 
2025-08-10 21:55:31,195 - INFO - Epoch: 88/500, Iter: 4/119 -- train_loss: 1.1772 
2025-08-10 21:55:31,519 - INFO - Epoch: 88/500, Iter: 5/119 -- train_loss: 1.1746 
2025-08-10 21:55:31,822 - INFO - Epoch: 88/500, Iter: 6/119 -- train_loss: 1.1749 
2025-08-10 21:55:34,226 - INFO - Epoch: 88/500, Iter: 7/119 -- train_loss: 1.0308 
2025-08-10 21:55:34,546 - INFO - Epoch: 88/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-10 21:55:34,870 - INFO - Epoch: 88/500, Iter: 9/119 -- train_loss: 1.1686 
2025-08-10 21:55:35,165 - INFO - Epoch: 88/500, Iter: 10/119 -- train_loss: 1.1642 
2025-08-10 21:55:35,872 - INFO - Epoch: 88/500, Iter: 11/119 -- train_loss: 1.1219 
2025-08-10 21:55:44,280 - INFO - Epoch: 88/500, Iter: 12/119 -- train_loss: 1.1664 
2025-08-10 21:55:44,567 - INFO - Epoch: 88/500, Iter: 13/119 -- train_loss: 1.1743 


[1/20]   5%|5          [00:00<?]

2025-08-10 21:59:26,409 - INFO - Epoch: 89/500, Iter: 1/119 -- train_loss: 1.1722 


[1/119]   1%|           [00:00<?]

2025-08-10 21:59:26,714 - INFO - Epoch: 89/500, Iter: 2/119 -- train_loss: 1.1748 
2025-08-10 21:59:26,992 - INFO - Epoch: 89/500, Iter: 3/119 -- train_loss: 1.1758 
2025-08-10 21:59:27,318 - INFO - Epoch: 89/500, Iter: 4/119 -- train_loss: 1.1648 
2025-08-10 21:59:27,618 - INFO - Epoch: 89/500, Iter: 5/119 -- train_loss: 1.1796 
2025-08-10 21:59:27,947 - INFO - Epoch: 89/500, Iter: 6/119 -- train_loss: 1.1495 
2025-08-10 21:59:28,219 - INFO - Epoch: 89/500, Iter: 7/119 -- train_loss: 0.9918 
2025-08-10 21:59:28,502 - INFO - Epoch: 89/500, Iter: 8/119 -- train_loss: 1.0864 
2025-08-10 21:59:39,008 - INFO - Epoch: 89/500, Iter: 9/119 -- train_loss: 1.1719 
2025-08-10 21:59:39,291 - INFO - Epoch: 89/500, Iter: 10/119 -- train_loss: 1.1765 
2025-08-10 21:59:39,551 - INFO - Epoch: 89/500, Iter: 11/119 -- train_loss: 1.1808 
2025-08-10 21:59:39,864 - INFO - Epoch: 89/500, Iter: 12/119 -- train_loss: 1.1584 
2025-08-10 21:59:40,141 - INFO - Epoch: 89/500, Iter: 13/119 -- train_loss: 1.1462 


[1/20]   5%|5          [00:00<?]

2025-08-10 22:03:16,185 - INFO - Epoch: 90/500, Iter: 1/119 -- train_loss: 1.1752 


[1/119]   1%|           [00:00<?]

2025-08-10 22:03:16,441 - INFO - Epoch: 90/500, Iter: 2/119 -- train_loss: 1.1748 
2025-08-10 22:03:16,700 - INFO - Epoch: 90/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-10 22:03:16,982 - INFO - Epoch: 90/500, Iter: 4/119 -- train_loss: 1.1379 
2025-08-10 22:03:17,242 - INFO - Epoch: 90/500, Iter: 5/119 -- train_loss: 1.1639 
2025-08-10 22:03:17,516 - INFO - Epoch: 90/500, Iter: 6/119 -- train_loss: 1.1743 
2025-08-10 22:03:17,816 - INFO - Epoch: 90/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-10 22:03:18,143 - INFO - Epoch: 90/500, Iter: 8/119 -- train_loss: 1.1446 
2025-08-10 22:03:22,727 - INFO - Epoch: 90/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-10 22:03:23,036 - INFO - Epoch: 90/500, Iter: 10/119 -- train_loss: 1.1559 
2025-08-10 22:03:23,346 - INFO - Epoch: 90/500, Iter: 11/119 -- train_loss: 1.1463 
2025-08-10 22:03:23,619 - INFO - Epoch: 90/500, Iter: 12/119 -- train_loss: 1.1769 
2025-08-10 22:03:23,902 - INFO - Epoch: 90/500, Iter: 13/119 -- train_loss: 1.1743 


[1/20]   5%|5          [00:00<?]

2025-08-10 22:06:54,707 - INFO - Epoch: 91/500, Iter: 1/119 -- train_loss: 1.1772 


[1/119]   1%|           [00:00<?]

2025-08-10 22:06:55,046 - INFO - Epoch: 91/500, Iter: 2/119 -- train_loss: 1.1206 
2025-08-10 22:06:55,313 - INFO - Epoch: 91/500, Iter: 3/119 -- train_loss: 1.1660 
2025-08-10 22:06:55,684 - INFO - Epoch: 91/500, Iter: 4/119 -- train_loss: 1.1754 
2025-08-10 22:06:55,957 - INFO - Epoch: 91/500, Iter: 5/119 -- train_loss: 1.1647 
2025-08-10 22:06:56,917 - INFO - Epoch: 91/500, Iter: 6/119 -- train_loss: 1.1675 
2025-08-10 22:06:57,207 - INFO - Epoch: 91/500, Iter: 7/119 -- train_loss: 1.1756 
2025-08-10 22:06:59,540 - INFO - Epoch: 91/500, Iter: 8/119 -- train_loss: 1.1452 
2025-08-10 22:06:59,870 - INFO - Epoch: 91/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-10 22:07:00,157 - INFO - Epoch: 91/500, Iter: 10/119 -- train_loss: 1.1291 
2025-08-10 22:07:01,688 - INFO - Epoch: 91/500, Iter: 11/119 -- train_loss: 1.1761 
2025-08-10 22:07:01,974 - INFO - Epoch: 91/500, Iter: 12/119 -- train_loss: 1.1597 
2025-08-10 22:07:02,297 - INFO - Epoch: 91/500, Iter: 13/119 -- train_loss: 1.1205 


[1/20]   5%|5          [00:00<?]

2025-08-10 22:10:37,410 - INFO - Epoch: 92/500, Iter: 1/119 -- train_loss: 1.1671 


[1/119]   1%|           [00:00<?]

2025-08-10 22:10:37,740 - INFO - Epoch: 92/500, Iter: 2/119 -- train_loss: 1.1769 
2025-08-10 22:10:38,048 - INFO - Epoch: 92/500, Iter: 3/119 -- train_loss: 1.1777 
2025-08-10 22:10:38,363 - INFO - Epoch: 92/500, Iter: 4/119 -- train_loss: 1.1796 
2025-08-10 22:10:38,644 - INFO - Epoch: 92/500, Iter: 5/119 -- train_loss: 1.1755 
2025-08-10 22:10:38,945 - INFO - Epoch: 92/500, Iter: 6/119 -- train_loss: 1.1725 
2025-08-10 22:10:39,270 - INFO - Epoch: 92/500, Iter: 7/119 -- train_loss: 1.1251 
2025-08-10 22:10:39,577 - INFO - Epoch: 92/500, Iter: 8/119 -- train_loss: 1.1606 
2025-08-10 22:10:47,415 - INFO - Epoch: 92/500, Iter: 9/119 -- train_loss: 1.1703 
2025-08-10 22:10:47,712 - INFO - Epoch: 92/500, Iter: 10/119 -- train_loss: 1.1557 
2025-08-10 22:10:47,981 - INFO - Epoch: 92/500, Iter: 11/119 -- train_loss: 1.1593 
2025-08-10 22:10:48,281 - INFO - Epoch: 92/500, Iter: 12/119 -- train_loss: 1.1704 
2025-08-10 22:10:48,586 - INFO - Epoch: 92/500, Iter: 13/119 -- train_loss: 1.1745 


[1/20]   5%|5          [00:00<?]

2025-08-10 22:14:29,929 - INFO - Epoch: 93/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-10 22:14:30,242 - INFO - Epoch: 93/500, Iter: 2/119 -- train_loss: 0.9614 
2025-08-10 22:14:30,568 - INFO - Epoch: 93/500, Iter: 3/119 -- train_loss: 1.1709 
2025-08-10 22:14:30,882 - INFO - Epoch: 93/500, Iter: 4/119 -- train_loss: 1.1635 
2025-08-10 22:14:31,209 - INFO - Epoch: 93/500, Iter: 5/119 -- train_loss: 1.1839 
2025-08-10 22:14:31,517 - INFO - Epoch: 93/500, Iter: 6/119 -- train_loss: 1.1534 
2025-08-10 22:14:31,815 - INFO - Epoch: 93/500, Iter: 7/119 -- train_loss: 1.1807 
2025-08-10 22:14:32,131 - INFO - Epoch: 93/500, Iter: 8/119 -- train_loss: 1.1580 
2025-08-10 22:14:34,048 - INFO - Epoch: 93/500, Iter: 9/119 -- train_loss: 1.1239 
2025-08-10 22:14:34,323 - INFO - Epoch: 93/500, Iter: 10/119 -- train_loss: 1.1799 
2025-08-10 22:14:34,648 - INFO - Epoch: 93/500, Iter: 11/119 -- train_loss: 1.1265 
2025-08-10 22:14:34,972 - INFO - Epoch: 93/500, Iter: 12/119 -- train_loss: 1.1821 
2025-08-10 22:14:35,277 - INFO - Epoch: 93/500, Iter: 13/119 -- train_loss: 1.1751 


[1/20]   5%|5          [00:00<?]

2025-08-10 22:18:13,535 - INFO - Epoch: 94/500, Iter: 1/119 -- train_loss: 1.1659 


[1/119]   1%|           [00:00<?]

2025-08-10 22:18:13,830 - INFO - Epoch: 94/500, Iter: 2/119 -- train_loss: 1.1754 
2025-08-10 22:18:14,133 - INFO - Epoch: 94/500, Iter: 3/119 -- train_loss: 1.1756 
2025-08-10 22:18:17,016 - INFO - Epoch: 94/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-10 22:18:17,348 - INFO - Epoch: 94/500, Iter: 5/119 -- train_loss: 1.1409 
2025-08-10 22:18:17,620 - INFO - Epoch: 94/500, Iter: 6/119 -- train_loss: 1.1321 
2025-08-10 22:18:17,937 - INFO - Epoch: 94/500, Iter: 7/119 -- train_loss: 1.1722 
2025-08-10 22:18:18,306 - INFO - Epoch: 94/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-10 22:18:21,584 - INFO - Epoch: 94/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-10 22:18:21,873 - INFO - Epoch: 94/500, Iter: 10/119 -- train_loss: 1.1745 
2025-08-10 22:18:22,183 - INFO - Epoch: 94/500, Iter: 11/119 -- train_loss: 1.1423 
2025-08-10 22:18:23,046 - INFO - Epoch: 94/500, Iter: 12/119 -- train_loss: 1.1650 
2025-08-10 22:18:23,360 - INFO - Epoch: 94/500, Iter: 13/119 -- train_loss: 1.1769 


[1/20]   5%|5          [00:00<?]

2025-08-10 22:22:01,204 - INFO - Epoch: 95/500, Iter: 1/119 -- train_loss: 1.1751 


[1/119]   1%|           [00:00<?]

2025-08-10 22:22:01,545 - INFO - Epoch: 95/500, Iter: 2/119 -- train_loss: 1.1441 
2025-08-10 22:22:01,824 - INFO - Epoch: 95/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-10 22:22:02,126 - INFO - Epoch: 95/500, Iter: 4/119 -- train_loss: 1.1671 
2025-08-10 22:22:03,371 - INFO - Epoch: 95/500, Iter: 5/119 -- train_loss: 1.1235 
2025-08-10 22:22:03,681 - INFO - Epoch: 95/500, Iter: 6/119 -- train_loss: 1.1689 
2025-08-10 22:22:03,970 - INFO - Epoch: 95/500, Iter: 7/119 -- train_loss: 1.1747 
2025-08-10 22:22:04,282 - INFO - Epoch: 95/500, Iter: 8/119 -- train_loss: 1.1634 
2025-08-10 22:22:09,808 - INFO - Epoch: 95/500, Iter: 9/119 -- train_loss: 1.1520 
2025-08-10 22:22:10,087 - INFO - Epoch: 95/500, Iter: 10/119 -- train_loss: 1.1745 
2025-08-10 22:22:10,380 - INFO - Epoch: 95/500, Iter: 11/119 -- train_loss: 1.1330 
2025-08-10 22:22:10,706 - INFO - Epoch: 95/500, Iter: 12/119 -- train_loss: 1.1522 
2025-08-10 22:22:10,983 - INFO - Epoch: 95/500, Iter: 13/119 -- train_loss: 1.0478 


[1/20]   5%|5          [00:00<?]

2025-08-10 22:25:40,745 - INFO - Epoch: 96/500, Iter: 1/119 -- train_loss: 1.1767 


[1/119]   1%|           [00:00<?]

2025-08-10 22:25:41,044 - INFO - Epoch: 96/500, Iter: 2/119 -- train_loss: 1.1554 
2025-08-10 22:25:48,751 - INFO - Epoch: 96/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-10 22:25:49,008 - INFO - Epoch: 96/500, Iter: 4/119 -- train_loss: 1.1426 
2025-08-10 22:25:49,292 - INFO - Epoch: 96/500, Iter: 5/119 -- train_loss: 1.1744 
2025-08-10 22:25:49,578 - INFO - Epoch: 96/500, Iter: 6/119 -- train_loss: 1.1756 
2025-08-10 22:25:49,866 - INFO - Epoch: 96/500, Iter: 7/119 -- train_loss: 1.1609 
2025-08-10 22:25:50,155 - INFO - Epoch: 96/500, Iter: 8/119 -- train_loss: 1.1586 
2025-08-10 22:25:50,467 - INFO - Epoch: 96/500, Iter: 9/119 -- train_loss: 1.1655 
2025-08-10 22:25:50,815 - INFO - Epoch: 96/500, Iter: 10/119 -- train_loss: 1.0707 
2025-08-10 22:25:56,522 - INFO - Epoch: 96/500, Iter: 11/119 -- train_loss: 1.1677 
2025-08-10 22:25:56,812 - INFO - Epoch: 96/500, Iter: 12/119 -- train_loss: 1.1417 
2025-08-10 22:25:57,101 - INFO - Epoch: 96/500, Iter: 13/119 -- train_loss: 1.1418 


[1/20]   5%|5          [00:00<?]

2025-08-10 22:29:31,084 - INFO - Epoch: 97/500, Iter: 1/119 -- train_loss: 1.1745 


[1/119]   1%|           [00:00<?]

2025-08-10 22:29:31,406 - INFO - Epoch: 97/500, Iter: 2/119 -- train_loss: 1.1342 
2025-08-10 22:29:31,687 - INFO - Epoch: 97/500, Iter: 3/119 -- train_loss: 1.1748 
2025-08-10 22:29:31,954 - INFO - Epoch: 97/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-10 22:29:32,235 - INFO - Epoch: 97/500, Iter: 5/119 -- train_loss: 1.1512 
2025-08-10 22:29:32,515 - INFO - Epoch: 97/500, Iter: 6/119 -- train_loss: 1.1745 
2025-08-10 22:29:32,805 - INFO - Epoch: 97/500, Iter: 7/119 -- train_loss: 1.1436 
2025-08-10 22:29:33,079 - INFO - Epoch: 97/500, Iter: 8/119 -- train_loss: 1.1639 
2025-08-10 22:29:36,017 - INFO - Epoch: 97/500, Iter: 9/119 -- train_loss: 1.1608 
2025-08-10 22:29:36,330 - INFO - Epoch: 97/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-10 22:29:36,627 - INFO - Epoch: 97/500, Iter: 11/119 -- train_loss: 1.1551 
2025-08-10 22:29:36,905 - INFO - Epoch: 97/500, Iter: 12/119 -- train_loss: 1.1604 
2025-08-10 22:29:37,174 - INFO - Epoch: 97/500, Iter: 13/119 -- train_loss: 1.1742 


[1/20]   5%|5          [00:00<?]

2025-08-10 22:32:55,695 - INFO - Epoch: 98/500, Iter: 1/119 -- train_loss: 1.1837 


[1/119]   1%|           [00:00<?]

2025-08-10 22:32:59,191 - INFO - Epoch: 98/500, Iter: 2/119 -- train_loss: 1.1746 
2025-08-10 22:32:59,494 - INFO - Epoch: 98/500, Iter: 3/119 -- train_loss: 1.1683 
2025-08-10 22:32:59,799 - INFO - Epoch: 98/500, Iter: 4/119 -- train_loss: 1.1746 
2025-08-10 22:33:00,097 - INFO - Epoch: 98/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-10 22:33:00,421 - INFO - Epoch: 98/500, Iter: 6/119 -- train_loss: 1.1515 
2025-08-10 22:33:00,731 - INFO - Epoch: 98/500, Iter: 7/119 -- train_loss: 1.1731 
2025-08-10 22:33:01,083 - INFO - Epoch: 98/500, Iter: 8/119 -- train_loss: 1.1586 
2025-08-10 22:33:01,392 - INFO - Epoch: 98/500, Iter: 9/119 -- train_loss: 1.1479 
2025-08-10 22:33:04,529 - INFO - Epoch: 98/500, Iter: 10/119 -- train_loss: 1.1722 
2025-08-10 22:33:04,833 - INFO - Epoch: 98/500, Iter: 11/119 -- train_loss: 1.1688 
2025-08-10 22:33:05,139 - INFO - Epoch: 98/500, Iter: 12/119 -- train_loss: 1.1701 
2025-08-10 22:33:05,443 - INFO - Epoch: 98/500, Iter: 13/119 -- train_loss: 1.1761 


[1/20]   5%|5          [00:00<?]

2025-08-10 22:36:40,386 - INFO - Epoch: 99/500, Iter: 1/119 -- train_loss: 1.1525 


[1/119]   1%|           [00:00<?]

2025-08-10 22:36:40,968 - INFO - Epoch: 99/500, Iter: 2/119 -- train_loss: 1.1946 
2025-08-10 22:36:41,259 - INFO - Epoch: 99/500, Iter: 3/119 -- train_loss: 1.1683 
2025-08-10 22:36:41,578 - INFO - Epoch: 99/500, Iter: 4/119 -- train_loss: 1.1750 
2025-08-10 22:36:42,012 - INFO - Epoch: 99/500, Iter: 5/119 -- train_loss: 1.0233 
2025-08-10 22:36:44,073 - INFO - Epoch: 99/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-10 22:36:47,120 - INFO - Epoch: 99/500, Iter: 7/119 -- train_loss: 1.1437 
2025-08-10 22:36:47,390 - INFO - Epoch: 99/500, Iter: 8/119 -- train_loss: 1.1575 
2025-08-10 22:36:47,689 - INFO - Epoch: 99/500, Iter: 9/119 -- train_loss: 1.1753 
2025-08-10 22:36:47,987 - INFO - Epoch: 99/500, Iter: 10/119 -- train_loss: 1.1684 
2025-08-10 22:36:48,309 - INFO - Epoch: 99/500, Iter: 11/119 -- train_loss: 1.1413 
2025-08-10 22:36:49,191 - INFO - Epoch: 99/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-10 22:36:49,534 - INFO - Epoch: 99/500, Iter: 13/119 -- train_loss: 1.1718 


[1/20]   5%|5          [00:00<?]

2025-08-10 22:40:29,312 - INFO - Epoch: 100/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-10 22:40:33,449 - INFO - Epoch: 100/500, Iter: 2/119 -- train_loss: 1.1744 
2025-08-10 22:40:33,757 - INFO - Epoch: 100/500, Iter: 3/119 -- train_loss: 1.1747 
2025-08-10 22:40:34,050 - INFO - Epoch: 100/500, Iter: 4/119 -- train_loss: 1.1744 
2025-08-10 22:40:34,921 - INFO - Epoch: 100/500, Iter: 5/119 -- train_loss: 1.1648 
2025-08-10 22:40:35,240 - INFO - Epoch: 100/500, Iter: 6/119 -- train_loss: 1.1744 
2025-08-10 22:40:35,987 - INFO - Epoch: 100/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-10 22:40:36,325 - INFO - Epoch: 100/500, Iter: 8/119 -- train_loss: 1.1746 
2025-08-10 22:40:40,878 - INFO - Epoch: 100/500, Iter: 9/119 -- train_loss: 1.1751 
2025-08-10 22:40:41,198 - INFO - Epoch: 100/500, Iter: 10/119 -- train_loss: 1.1585 
2025-08-10 22:40:41,499 - INFO - Epoch: 100/500, Iter: 11/119 -- train_loss: 1.1617 
2025-08-10 22:40:41,786 - INFO - Epoch: 100/500, Iter: 12/119 -- train_loss: 1.1312 
2025-08-10 22:40:42,084 - INFO - Epoch: 100/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 22:44:16,601 - INFO - Epoch: 101/500, Iter: 1/119 -- train_loss: 1.1762 


[1/119]   1%|           [00:00<?]

2025-08-10 22:44:22,037 - INFO - Epoch: 101/500, Iter: 2/119 -- train_loss: 0.9488 
2025-08-10 22:44:22,373 - INFO - Epoch: 101/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-10 22:44:22,684 - INFO - Epoch: 101/500, Iter: 4/119 -- train_loss: 1.1243 
2025-08-10 22:44:23,008 - INFO - Epoch: 101/500, Iter: 5/119 -- train_loss: 1.1744 
2025-08-10 22:44:23,338 - INFO - Epoch: 101/500, Iter: 6/119 -- train_loss: 1.1785 
2025-08-10 22:44:23,625 - INFO - Epoch: 101/500, Iter: 7/119 -- train_loss: 1.1652 
2025-08-10 22:44:23,940 - INFO - Epoch: 101/500, Iter: 8/119 -- train_loss: 1.1748 
2025-08-10 22:44:24,248 - INFO - Epoch: 101/500, Iter: 9/119 -- train_loss: 1.1752 
2025-08-10 22:44:31,998 - INFO - Epoch: 101/500, Iter: 10/119 -- train_loss: 1.0375 
2025-08-10 22:44:32,253 - INFO - Epoch: 101/500, Iter: 11/119 -- train_loss: 1.1795 
2025-08-10 22:44:32,516 - INFO - Epoch: 101/500, Iter: 12/119 -- train_loss: 1.1761 
2025-08-10 22:44:32,802 - INFO - Epoch: 101/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 22:48:03,358 - INFO - Epoch: 102/500, Iter: 1/119 -- train_loss: 1.1699 


[1/119]   1%|           [00:00<?]

2025-08-10 22:48:07,342 - INFO - Epoch: 102/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-10 22:48:07,659 - INFO - Epoch: 102/500, Iter: 3/119 -- train_loss: 1.1652 
2025-08-10 22:48:07,952 - INFO - Epoch: 102/500, Iter: 4/119 -- train_loss: 1.1368 
2025-08-10 22:48:08,281 - INFO - Epoch: 102/500, Iter: 5/119 -- train_loss: 1.1543 
2025-08-10 22:48:08,603 - INFO - Epoch: 102/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-10 22:48:08,938 - INFO - Epoch: 102/500, Iter: 7/119 -- train_loss: 1.1624 
2025-08-10 22:48:09,243 - INFO - Epoch: 102/500, Iter: 8/119 -- train_loss: 1.0604 
2025-08-10 22:48:12,078 - INFO - Epoch: 102/500, Iter: 9/119 -- train_loss: 1.1718 
2025-08-10 22:48:15,731 - INFO - Epoch: 102/500, Iter: 10/119 -- train_loss: 1.1745 
2025-08-10 22:48:16,040 - INFO - Epoch: 102/500, Iter: 11/119 -- train_loss: 1.1778 
2025-08-10 22:48:16,397 - INFO - Epoch: 102/500, Iter: 12/119 -- train_loss: 1.1640 
2025-08-10 22:48:16,725 - INFO - Epoch: 102/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 22:51:55,894 - INFO - Epoch: 103/500, Iter: 1/119 -- train_loss: 1.1786 


[1/119]   1%|           [00:00<?]

2025-08-10 22:52:03,527 - INFO - Epoch: 103/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-10 22:52:03,837 - INFO - Epoch: 103/500, Iter: 3/119 -- train_loss: 1.1779 
2025-08-10 22:52:04,134 - INFO - Epoch: 103/500, Iter: 4/119 -- train_loss: 1.1520 
2025-08-10 22:52:04,415 - INFO - Epoch: 103/500, Iter: 5/119 -- train_loss: 1.1734 
2025-08-10 22:52:04,731 - INFO - Epoch: 103/500, Iter: 6/119 -- train_loss: 1.1693 
2025-08-10 22:52:05,024 - INFO - Epoch: 103/500, Iter: 7/119 -- train_loss: 1.1684 
2025-08-10 22:52:05,330 - INFO - Epoch: 103/500, Iter: 8/119 -- train_loss: 1.1624 
2025-08-10 22:52:05,625 - INFO - Epoch: 103/500, Iter: 9/119 -- train_loss: 1.1475 
2025-08-10 22:52:13,680 - INFO - Epoch: 103/500, Iter: 10/119 -- train_loss: 1.1701 
2025-08-10 22:52:13,939 - INFO - Epoch: 103/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-10 22:52:14,236 - INFO - Epoch: 103/500, Iter: 12/119 -- train_loss: 1.1522 
2025-08-10 22:52:14,521 - INFO - Epoch: 103/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 22:55:39,998 - INFO - Epoch: 104/500, Iter: 1/119 -- train_loss: 1.1828 


[1/119]   1%|           [00:00<?]

2025-08-10 22:55:47,425 - INFO - Epoch: 104/500, Iter: 2/119 -- train_loss: 1.1673 
2025-08-10 22:55:47,742 - INFO - Epoch: 104/500, Iter: 3/119 -- train_loss: 1.1776 
2025-08-10 22:55:48,051 - INFO - Epoch: 104/500, Iter: 4/119 -- train_loss: 1.1865 
2025-08-10 22:55:48,338 - INFO - Epoch: 104/500, Iter: 5/119 -- train_loss: 1.1687 
2025-08-10 22:55:48,630 - INFO - Epoch: 104/500, Iter: 6/119 -- train_loss: 1.1733 
2025-08-10 22:55:48,930 - INFO - Epoch: 104/500, Iter: 7/119 -- train_loss: 1.0790 
2025-08-10 22:55:49,252 - INFO - Epoch: 104/500, Iter: 8/119 -- train_loss: 1.1745 
2025-08-10 22:55:49,546 - INFO - Epoch: 104/500, Iter: 9/119 -- train_loss: 0.9641 
2025-08-10 22:55:55,842 - INFO - Epoch: 104/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-10 22:55:56,160 - INFO - Epoch: 104/500, Iter: 11/119 -- train_loss: 1.1600 
2025-08-10 22:55:56,452 - INFO - Epoch: 104/500, Iter: 12/119 -- train_loss: 1.1379 
2025-08-10 22:55:56,803 - INFO - Epoch: 104/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 22:59:28,046 - INFO - Epoch: 105/500, Iter: 1/119 -- train_loss: 1.1707 


[1/119]   1%|           [00:00<?]

2025-08-10 22:59:28,387 - INFO - Epoch: 105/500, Iter: 2/119 -- train_loss: 1.1668 
2025-08-10 22:59:28,672 - INFO - Epoch: 105/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-10 22:59:28,993 - INFO - Epoch: 105/500, Iter: 4/119 -- train_loss: 1.1550 
2025-08-10 22:59:29,309 - INFO - Epoch: 105/500, Iter: 5/119 -- train_loss: 1.1683 
2025-08-10 22:59:29,597 - INFO - Epoch: 105/500, Iter: 6/119 -- train_loss: 1.0177 
2025-08-10 22:59:29,897 - INFO - Epoch: 105/500, Iter: 7/119 -- train_loss: 1.1372 
2025-08-10 22:59:30,211 - INFO - Epoch: 105/500, Iter: 8/119 -- train_loss: 1.1558 
2025-08-10 22:59:37,930 - INFO - Epoch: 105/500, Iter: 9/119 -- train_loss: 1.1754 
2025-08-10 22:59:38,231 - INFO - Epoch: 105/500, Iter: 10/119 -- train_loss: 1.1700 
2025-08-10 22:59:38,531 - INFO - Epoch: 105/500, Iter: 11/119 -- train_loss: 1.1744 
2025-08-10 22:59:38,852 - INFO - Epoch: 105/500, Iter: 12/119 -- train_loss: 1.1745 
2025-08-10 22:59:39,151 - INFO - Epoch: 105/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:03:12,076 - INFO - Epoch: 106/500, Iter: 1/119 -- train_loss: 1.0207 


[1/119]   1%|           [00:00<?]

2025-08-10 23:03:19,255 - INFO - Epoch: 106/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-10 23:03:19,555 - INFO - Epoch: 106/500, Iter: 3/119 -- train_loss: 1.1708 
2025-08-10 23:03:19,871 - INFO - Epoch: 106/500, Iter: 4/119 -- train_loss: 1.1508 
2025-08-10 23:03:20,204 - INFO - Epoch: 106/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-10 23:03:20,514 - INFO - Epoch: 106/500, Iter: 6/119 -- train_loss: 1.1703 
2025-08-10 23:03:20,813 - INFO - Epoch: 106/500, Iter: 7/119 -- train_loss: 1.1739 
2025-08-10 23:03:21,111 - INFO - Epoch: 106/500, Iter: 8/119 -- train_loss: 1.1329 
2025-08-10 23:03:21,410 - INFO - Epoch: 106/500, Iter: 9/119 -- train_loss: 1.1745 
2025-08-10 23:03:23,891 - INFO - Epoch: 106/500, Iter: 10/119 -- train_loss: 1.1746 
2025-08-10 23:03:24,233 - INFO - Epoch: 106/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-10 23:03:24,532 - INFO - Epoch: 106/500, Iter: 12/119 -- train_loss: 1.1403 
2025-08-10 23:03:24,839 - INFO - Epoch: 106/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:07:02,459 - INFO - Epoch: 107/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-10 23:07:07,201 - INFO - Epoch: 107/500, Iter: 2/119 -- train_loss: 1.1747 
2025-08-10 23:07:14,846 - INFO - Epoch: 107/500, Iter: 3/119 -- train_loss: 1.1758 
2025-08-10 23:07:15,112 - INFO - Epoch: 107/500, Iter: 4/119 -- train_loss: 1.1731 
2025-08-10 23:07:15,396 - INFO - Epoch: 107/500, Iter: 5/119 -- train_loss: 1.1706 
2025-08-10 23:07:15,715 - INFO - Epoch: 107/500, Iter: 6/119 -- train_loss: 1.1643 
2025-08-10 23:07:16,039 - INFO - Epoch: 107/500, Iter: 7/119 -- train_loss: 1.1284 
2025-08-10 23:07:16,351 - INFO - Epoch: 107/500, Iter: 8/119 -- train_loss: 1.1701 
2025-08-10 23:07:16,625 - INFO - Epoch: 107/500, Iter: 9/119 -- train_loss: 1.1452 
2025-08-10 23:07:16,962 - INFO - Epoch: 107/500, Iter: 10/119 -- train_loss: 1.1763 
2025-08-10 23:07:19,152 - INFO - Epoch: 107/500, Iter: 11/119 -- train_loss: 1.1425 
2025-08-10 23:07:19,431 - INFO - Epoch: 107/500, Iter: 12/119 -- train_loss: 1.1778 
2025-08-10 23:07:19,738 - INFO - Epoch: 107/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:10:53,560 - INFO - Epoch: 108/500, Iter: 1/119 -- train_loss: 1.1744 


[1/119]   1%|           [00:00<?]

2025-08-10 23:10:53,857 - INFO - Epoch: 108/500, Iter: 2/119 -- train_loss: 1.1746 
2025-08-10 23:10:54,175 - INFO - Epoch: 108/500, Iter: 3/119 -- train_loss: 1.1612 
2025-08-10 23:10:54,481 - INFO - Epoch: 108/500, Iter: 4/119 -- train_loss: 1.1547 
2025-08-10 23:10:55,723 - INFO - Epoch: 108/500, Iter: 5/119 -- train_loss: 1.1761 
2025-08-10 23:10:56,024 - INFO - Epoch: 108/500, Iter: 6/119 -- train_loss: 1.1759 
2025-08-10 23:10:56,301 - INFO - Epoch: 108/500, Iter: 7/119 -- train_loss: 1.1749 
2025-08-10 23:10:56,592 - INFO - Epoch: 108/500, Iter: 8/119 -- train_loss: 1.1572 
2025-08-10 23:11:02,519 - INFO - Epoch: 108/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-10 23:11:05,072 - INFO - Epoch: 108/500, Iter: 10/119 -- train_loss: 1.1547 
2025-08-10 23:11:05,364 - INFO - Epoch: 108/500, Iter: 11/119 -- train_loss: 1.1643 
2025-08-10 23:11:05,697 - INFO - Epoch: 108/500, Iter: 12/119 -- train_loss: 1.1745 
2025-08-10 23:11:05,990 - INFO - Epoch: 108/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:14:35,064 - INFO - Epoch: 109/500, Iter: 1/119 -- train_loss: 1.1762 


[1/119]   1%|           [00:00<?]

2025-08-10 23:14:41,889 - INFO - Epoch: 109/500, Iter: 2/119 -- train_loss: 1.1749 
2025-08-10 23:14:42,154 - INFO - Epoch: 109/500, Iter: 3/119 -- train_loss: 1.1469 
2025-08-10 23:14:42,464 - INFO - Epoch: 109/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-10 23:14:42,783 - INFO - Epoch: 109/500, Iter: 5/119 -- train_loss: 1.1755 
2025-08-10 23:14:43,111 - INFO - Epoch: 109/500, Iter: 6/119 -- train_loss: 1.1743 
2025-08-10 23:14:43,411 - INFO - Epoch: 109/500, Iter: 7/119 -- train_loss: 1.1734 
2025-08-10 23:14:43,751 - INFO - Epoch: 109/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-10 23:14:44,040 - INFO - Epoch: 109/500, Iter: 9/119 -- train_loss: 1.1630 
2025-08-10 23:14:50,878 - INFO - Epoch: 109/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-10 23:14:51,186 - INFO - Epoch: 109/500, Iter: 11/119 -- train_loss: 1.1443 
2025-08-10 23:14:51,515 - INFO - Epoch: 109/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-10 23:14:51,838 - INFO - Epoch: 109/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:18:38,635 - INFO - Epoch: 110/500, Iter: 1/119 -- train_loss: 1.1724 


[1/119]   1%|           [00:00<?]

2025-08-10 23:18:38,987 - INFO - Epoch: 110/500, Iter: 2/119 -- train_loss: 1.1454 
2025-08-10 23:18:39,296 - INFO - Epoch: 110/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-10 23:18:39,605 - INFO - Epoch: 110/500, Iter: 4/119 -- train_loss: 1.1736 
2025-08-10 23:18:39,901 - INFO - Epoch: 110/500, Iter: 5/119 -- train_loss: 1.1669 
2025-08-10 23:18:40,260 - INFO - Epoch: 110/500, Iter: 6/119 -- train_loss: 1.1848 
2025-08-10 23:18:41,963 - INFO - Epoch: 110/500, Iter: 7/119 -- train_loss: 1.1823 
2025-08-10 23:18:42,275 - INFO - Epoch: 110/500, Iter: 8/119 -- train_loss: 1.1536 
2025-08-10 23:18:45,518 - INFO - Epoch: 110/500, Iter: 9/119 -- train_loss: 1.1794 
2025-08-10 23:18:49,789 - INFO - Epoch: 110/500, Iter: 10/119 -- train_loss: 1.1757 
2025-08-10 23:18:50,068 - INFO - Epoch: 110/500, Iter: 11/119 -- train_loss: 1.1778 
2025-08-10 23:18:50,739 - INFO - Epoch: 110/500, Iter: 12/119 -- train_loss: 1.1496 
2025-08-10 23:18:51,054 - INFO - Epoch: 110/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:22:33,800 - INFO - Epoch: 111/500, Iter: 1/119 -- train_loss: 1.1132 


[1/119]   1%|           [00:00<?]

2025-08-10 23:22:34,141 - INFO - Epoch: 111/500, Iter: 2/119 -- train_loss: 1.1750 
2025-08-10 23:22:34,426 - INFO - Epoch: 111/500, Iter: 3/119 -- train_loss: 1.1747 
2025-08-10 23:22:34,728 - INFO - Epoch: 111/500, Iter: 4/119 -- train_loss: 1.1672 
2025-08-10 23:22:35,048 - INFO - Epoch: 111/500, Iter: 5/119 -- train_loss: 0.9755 
2025-08-10 23:22:35,361 - INFO - Epoch: 111/500, Iter: 6/119 -- train_loss: 1.1717 
2025-08-10 23:22:35,622 - INFO - Epoch: 111/500, Iter: 7/119 -- train_loss: 1.1235 
2025-08-10 23:22:37,380 - INFO - Epoch: 111/500, Iter: 8/119 -- train_loss: 1.1908 
2025-08-10 23:22:41,525 - INFO - Epoch: 111/500, Iter: 9/119 -- train_loss: 1.1677 
2025-08-10 23:22:41,815 - INFO - Epoch: 111/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-10 23:22:42,129 - INFO - Epoch: 111/500, Iter: 11/119 -- train_loss: 1.1721 
2025-08-10 23:22:42,456 - INFO - Epoch: 111/500, Iter: 12/119 -- train_loss: 1.1683 
2025-08-10 23:22:42,987 - INFO - Epoch: 111/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:26:27,886 - INFO - Epoch: 112/500, Iter: 1/119 -- train_loss: 1.1713 


[1/119]   1%|           [00:00<?]

2025-08-10 23:26:28,134 - INFO - Epoch: 112/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-10 23:26:28,424 - INFO - Epoch: 112/500, Iter: 3/119 -- train_loss: 1.1661 
2025-08-10 23:26:28,675 - INFO - Epoch: 112/500, Iter: 4/119 -- train_loss: 1.1751 
2025-08-10 23:26:28,990 - INFO - Epoch: 112/500, Iter: 5/119 -- train_loss: 1.1766 
2025-08-10 23:26:29,277 - INFO - Epoch: 112/500, Iter: 6/119 -- train_loss: 1.1705 
2025-08-10 23:26:29,576 - INFO - Epoch: 112/500, Iter: 7/119 -- train_loss: 1.1747 
2025-08-10 23:26:29,875 - INFO - Epoch: 112/500, Iter: 8/119 -- train_loss: 1.1447 
2025-08-10 23:26:32,033 - INFO - Epoch: 112/500, Iter: 9/119 -- train_loss: 1.1725 
2025-08-10 23:26:32,401 - INFO - Epoch: 112/500, Iter: 10/119 -- train_loss: 1.1747 
2025-08-10 23:26:32,751 - INFO - Epoch: 112/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-10 23:26:33,053 - INFO - Epoch: 112/500, Iter: 12/119 -- train_loss: 1.1758 
2025-08-10 23:26:33,339 - INFO - Epoch: 112/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:30:16,221 - INFO - Epoch: 113/500, Iter: 1/119 -- train_loss: 1.1680 


[1/119]   1%|           [00:00<?]

2025-08-10 23:30:16,454 - INFO - Epoch: 113/500, Iter: 2/119 -- train_loss: 1.0732 
2025-08-10 23:30:16,731 - INFO - Epoch: 113/500, Iter: 3/119 -- train_loss: 1.1340 
2025-08-10 23:30:17,001 - INFO - Epoch: 113/500, Iter: 4/119 -- train_loss: 1.1761 
2025-08-10 23:30:17,283 - INFO - Epoch: 113/500, Iter: 5/119 -- train_loss: 1.1704 
2025-08-10 23:30:17,590 - INFO - Epoch: 113/500, Iter: 6/119 -- train_loss: 1.1757 
2025-08-10 23:30:17,908 - INFO - Epoch: 113/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-10 23:30:18,212 - INFO - Epoch: 113/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-10 23:30:21,349 - INFO - Epoch: 113/500, Iter: 9/119 -- train_loss: 1.1746 
2025-08-10 23:30:21,613 - INFO - Epoch: 113/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-10 23:30:23,898 - INFO - Epoch: 113/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-10 23:30:24,205 - INFO - Epoch: 113/500, Iter: 12/119 -- train_loss: 1.1745 
2025-08-10 23:30:24,474 - INFO - Epoch: 113/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:34:06,380 - INFO - Epoch: 114/500, Iter: 1/119 -- train_loss: 1.1730 


[1/119]   1%|           [00:00<?]

2025-08-10 23:34:06,690 - INFO - Epoch: 114/500, Iter: 2/119 -- train_loss: 1.0102 
2025-08-10 23:34:06,996 - INFO - Epoch: 114/500, Iter: 3/119 -- train_loss: 1.1769 
2025-08-10 23:34:09,004 - INFO - Epoch: 114/500, Iter: 4/119 -- train_loss: 1.1775 
2025-08-10 23:34:09,318 - INFO - Epoch: 114/500, Iter: 5/119 -- train_loss: 1.1757 
2025-08-10 23:34:09,601 - INFO - Epoch: 114/500, Iter: 6/119 -- train_loss: 1.1601 
2025-08-10 23:34:09,897 - INFO - Epoch: 114/500, Iter: 7/119 -- train_loss: 1.1765 
2025-08-10 23:34:10,220 - INFO - Epoch: 114/500, Iter: 8/119 -- train_loss: 1.1282 
2025-08-10 23:34:11,327 - INFO - Epoch: 114/500, Iter: 9/119 -- train_loss: 1.1095 
2025-08-10 23:34:11,641 - INFO - Epoch: 114/500, Iter: 10/119 -- train_loss: 1.1698 
2025-08-10 23:34:11,948 - INFO - Epoch: 114/500, Iter: 11/119 -- train_loss: 1.1600 
2025-08-10 23:34:14,695 - INFO - Epoch: 114/500, Iter: 12/119 -- train_loss: 1.1720 
2025-08-10 23:34:14,986 - INFO - Epoch: 114/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:37:48,250 - INFO - Epoch: 115/500, Iter: 1/119 -- train_loss: 1.1634 


[1/119]   1%|           [00:00<?]

2025-08-10 23:37:55,054 - INFO - Epoch: 115/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-10 23:37:55,370 - INFO - Epoch: 115/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-10 23:37:55,659 - INFO - Epoch: 115/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-10 23:37:55,956 - INFO - Epoch: 115/500, Iter: 5/119 -- train_loss: 1.1672 
2025-08-10 23:37:56,256 - INFO - Epoch: 115/500, Iter: 6/119 -- train_loss: 1.1480 
2025-08-10 23:37:56,539 - INFO - Epoch: 115/500, Iter: 7/119 -- train_loss: 1.1451 
2025-08-10 23:37:56,843 - INFO - Epoch: 115/500, Iter: 8/119 -- train_loss: 1.1714 
2025-08-10 23:37:57,140 - INFO - Epoch: 115/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-10 23:37:59,248 - INFO - Epoch: 115/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-10 23:37:59,594 - INFO - Epoch: 115/500, Iter: 11/119 -- train_loss: 1.0817 
2025-08-10 23:38:00,523 - INFO - Epoch: 115/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-10 23:38:00,848 - INFO - Epoch: 115/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:41:36,614 - INFO - Epoch: 116/500, Iter: 1/119 -- train_loss: 1.1746 


[1/119]   1%|           [00:00<?]

2025-08-10 23:41:36,938 - INFO - Epoch: 116/500, Iter: 2/119 -- train_loss: 1.1678 
2025-08-10 23:41:37,245 - INFO - Epoch: 116/500, Iter: 3/119 -- train_loss: 1.1661 
2025-08-10 23:41:37,544 - INFO - Epoch: 116/500, Iter: 4/119 -- train_loss: 1.1584 
2025-08-10 23:41:37,878 - INFO - Epoch: 116/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-10 23:41:38,165 - INFO - Epoch: 116/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-10 23:41:38,491 - INFO - Epoch: 116/500, Iter: 7/119 -- train_loss: 1.1664 
2025-08-10 23:41:38,794 - INFO - Epoch: 116/500, Iter: 8/119 -- train_loss: 1.1634 
2025-08-10 23:41:40,496 - INFO - Epoch: 116/500, Iter: 9/119 -- train_loss: 1.1449 
2025-08-10 23:41:40,838 - INFO - Epoch: 116/500, Iter: 10/119 -- train_loss: 1.1784 
2025-08-10 23:41:47,682 - INFO - Epoch: 116/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-10 23:41:47,957 - INFO - Epoch: 116/500, Iter: 12/119 -- train_loss: 1.0601 
2025-08-10 23:41:48,206 - INFO - Epoch: 116/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:45:22,794 - INFO - Epoch: 117/500, Iter: 1/119 -- train_loss: 1.1658 


[1/119]   1%|           [00:00<?]

2025-08-10 23:45:24,274 - INFO - Epoch: 117/500, Iter: 2/119 -- train_loss: 1.1519 
2025-08-10 23:45:24,594 - INFO - Epoch: 117/500, Iter: 3/119 -- train_loss: 1.1757 
2025-08-10 23:45:24,865 - INFO - Epoch: 117/500, Iter: 4/119 -- train_loss: 1.1753 
2025-08-10 23:45:25,173 - INFO - Epoch: 117/500, Iter: 5/119 -- train_loss: 1.1578 
2025-08-10 23:45:25,506 - INFO - Epoch: 117/500, Iter: 6/119 -- train_loss: 1.1503 
2025-08-10 23:45:26,337 - INFO - Epoch: 117/500, Iter: 7/119 -- train_loss: 1.1822 
2025-08-10 23:45:26,617 - INFO - Epoch: 117/500, Iter: 8/119 -- train_loss: 1.1724 
2025-08-10 23:45:31,555 - INFO - Epoch: 117/500, Iter: 9/119 -- train_loss: 1.1546 
2025-08-10 23:45:31,878 - INFO - Epoch: 117/500, Iter: 10/119 -- train_loss: 1.1747 
2025-08-10 23:45:32,170 - INFO - Epoch: 117/500, Iter: 11/119 -- train_loss: 1.0184 
2025-08-10 23:45:33,359 - INFO - Epoch: 117/500, Iter: 12/119 -- train_loss: 1.1584 
2025-08-10 23:45:33,691 - INFO - Epoch: 117/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:49:14,907 - INFO - Epoch: 118/500, Iter: 1/119 -- train_loss: 1.1557 


[1/119]   1%|           [00:00<?]

2025-08-10 23:49:15,224 - INFO - Epoch: 118/500, Iter: 2/119 -- train_loss: 1.1718 
2025-08-10 23:49:15,517 - INFO - Epoch: 118/500, Iter: 3/119 -- train_loss: 1.1670 
2025-08-10 23:49:19,398 - INFO - Epoch: 118/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-10 23:49:19,692 - INFO - Epoch: 118/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-10 23:49:20,020 - INFO - Epoch: 118/500, Iter: 6/119 -- train_loss: 1.1579 
2025-08-10 23:49:20,320 - INFO - Epoch: 118/500, Iter: 7/119 -- train_loss: 1.1587 
2025-08-10 23:49:20,629 - INFO - Epoch: 118/500, Iter: 8/119 -- train_loss: 1.1318 
2025-08-10 23:49:23,001 - INFO - Epoch: 118/500, Iter: 9/119 -- train_loss: 1.1763 
2025-08-10 23:49:23,329 - INFO - Epoch: 118/500, Iter: 10/119 -- train_loss: 1.1759 
2025-08-10 23:49:23,632 - INFO - Epoch: 118/500, Iter: 11/119 -- train_loss: 1.1520 
2025-08-10 23:49:25,454 - INFO - Epoch: 118/500, Iter: 12/119 -- train_loss: 1.1757 
2025-08-10 23:49:25,760 - INFO - Epoch: 118/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:52:52,043 - INFO - Epoch: 119/500, Iter: 1/119 -- train_loss: 1.1473 


[1/119]   1%|           [00:00<?]

2025-08-10 23:52:52,537 - INFO - Epoch: 119/500, Iter: 2/119 -- train_loss: 1.1656 
2025-08-10 23:52:52,864 - INFO - Epoch: 119/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-10 23:52:53,320 - INFO - Epoch: 119/500, Iter: 4/119 -- train_loss: 1.1627 
2025-08-10 23:52:54,635 - INFO - Epoch: 119/500, Iter: 5/119 -- train_loss: 1.1615 
2025-08-10 23:52:54,973 - INFO - Epoch: 119/500, Iter: 6/119 -- train_loss: 1.0727 
2025-08-10 23:52:56,513 - INFO - Epoch: 119/500, Iter: 7/119 -- train_loss: 1.1467 
2025-08-10 23:52:56,825 - INFO - Epoch: 119/500, Iter: 8/119 -- train_loss: 1.1685 
2025-08-10 23:53:02,834 - INFO - Epoch: 119/500, Iter: 9/119 -- train_loss: 1.1745 
2025-08-10 23:53:03,134 - INFO - Epoch: 119/500, Iter: 10/119 -- train_loss: 1.1555 
2025-08-10 23:53:03,427 - INFO - Epoch: 119/500, Iter: 11/119 -- train_loss: 1.1705 
2025-08-10 23:53:03,727 - INFO - Epoch: 119/500, Iter: 12/119 -- train_loss: 1.1072 
2025-08-10 23:53:04,048 - INFO - Epoch: 119/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-10 23:57:00,978 - INFO - Epoch: 120/500, Iter: 1/119 -- train_loss: 1.1715 


[1/119]   1%|           [00:00<?]

2025-08-10 23:57:01,281 - INFO - Epoch: 120/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-10 23:57:01,568 - INFO - Epoch: 120/500, Iter: 3/119 -- train_loss: 1.1747 
2025-08-10 23:57:01,861 - INFO - Epoch: 120/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-10 23:57:02,178 - INFO - Epoch: 120/500, Iter: 5/119 -- train_loss: 1.1476 
2025-08-10 23:57:02,481 - INFO - Epoch: 120/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-10 23:57:02,810 - INFO - Epoch: 120/500, Iter: 7/119 -- train_loss: 1.1746 
2025-08-10 23:57:03,114 - INFO - Epoch: 120/500, Iter: 8/119 -- train_loss: 1.1713 
2025-08-10 23:57:06,672 - INFO - Epoch: 120/500, Iter: 9/119 -- train_loss: 1.1232 
2025-08-10 23:57:06,926 - INFO - Epoch: 120/500, Iter: 10/119 -- train_loss: 1.1745 
2025-08-10 23:57:07,233 - INFO - Epoch: 120/500, Iter: 11/119 -- train_loss: 1.1350 
2025-08-10 23:57:07,551 - INFO - Epoch: 120/500, Iter: 12/119 -- train_loss: 1.1708 
2025-08-10 23:57:07,863 - INFO - Epoch: 120/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:00:52,873 - INFO - Epoch: 121/500, Iter: 1/119 -- train_loss: 1.1725 


[1/119]   1%|           [00:00<?]

2025-08-11 00:00:53,174 - INFO - Epoch: 121/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-11 00:00:53,457 - INFO - Epoch: 121/500, Iter: 3/119 -- train_loss: 1.1548 
2025-08-11 00:00:53,773 - INFO - Epoch: 121/500, Iter: 4/119 -- train_loss: 1.0554 
2025-08-11 00:00:55,433 - INFO - Epoch: 121/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 00:00:55,727 - INFO - Epoch: 121/500, Iter: 6/119 -- train_loss: 1.1681 
2025-08-11 00:00:56,021 - INFO - Epoch: 121/500, Iter: 7/119 -- train_loss: 1.1740 
2025-08-11 00:00:56,289 - INFO - Epoch: 121/500, Iter: 8/119 -- train_loss: 1.1649 
2025-08-11 00:00:58,055 - INFO - Epoch: 121/500, Iter: 9/119 -- train_loss: 1.1793 
2025-08-11 00:00:58,391 - INFO - Epoch: 121/500, Iter: 10/119 -- train_loss: 1.1745 
2025-08-11 00:00:58,713 - INFO - Epoch: 121/500, Iter: 11/119 -- train_loss: 1.1676 
2025-08-11 00:00:59,033 - INFO - Epoch: 121/500, Iter: 12/119 -- train_loss: 1.1748 
2025-08-11 00:01:01,670 - INFO - Epoch: 121/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:04:43,391 - INFO - Epoch: 122/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 00:04:44,113 - INFO - Epoch: 122/500, Iter: 2/119 -- train_loss: 1.1744 
2025-08-11 00:04:44,411 - INFO - Epoch: 122/500, Iter: 3/119 -- train_loss: 1.1762 
2025-08-11 00:04:48,377 - INFO - Epoch: 122/500, Iter: 4/119 -- train_loss: 1.1334 
2025-08-11 00:04:48,684 - INFO - Epoch: 122/500, Iter: 5/119 -- train_loss: 1.1186 
2025-08-11 00:04:48,998 - INFO - Epoch: 122/500, Iter: 6/119 -- train_loss: 1.1745 
2025-08-11 00:04:49,332 - INFO - Epoch: 122/500, Iter: 7/119 -- train_loss: 1.1575 
2025-08-11 00:04:49,660 - INFO - Epoch: 122/500, Iter: 8/119 -- train_loss: 1.1024 
2025-08-11 00:04:54,797 - INFO - Epoch: 122/500, Iter: 9/119 -- train_loss: 1.1053 
2025-08-11 00:04:56,343 - INFO - Epoch: 122/500, Iter: 10/119 -- train_loss: 1.0236 
2025-08-11 00:04:56,662 - INFO - Epoch: 122/500, Iter: 11/119 -- train_loss: 1.0636 
2025-08-11 00:04:56,980 - INFO - Epoch: 122/500, Iter: 12/119 -- train_loss: 1.1715 
2025-08-11 00:04:57,292 - INFO - Epoch: 122/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:08:35,506 - INFO - Epoch: 123/500, Iter: 1/119 -- train_loss: 1.1754 


[1/119]   1%|           [00:00<?]

2025-08-11 00:08:35,820 - INFO - Epoch: 123/500, Iter: 2/119 -- train_loss: 1.1732 
2025-08-11 00:08:36,120 - INFO - Epoch: 123/500, Iter: 3/119 -- train_loss: 1.1631 
2025-08-11 00:08:36,382 - INFO - Epoch: 123/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 00:08:36,657 - INFO - Epoch: 123/500, Iter: 5/119 -- train_loss: 1.1702 
2025-08-11 00:08:38,284 - INFO - Epoch: 123/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 00:08:38,632 - INFO - Epoch: 123/500, Iter: 7/119 -- train_loss: 1.1340 
2025-08-11 00:08:38,956 - INFO - Epoch: 123/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 00:08:43,534 - INFO - Epoch: 123/500, Iter: 9/119 -- train_loss: 1.1746 
2025-08-11 00:08:43,853 - INFO - Epoch: 123/500, Iter: 10/119 -- train_loss: 1.1344 
2025-08-11 00:08:44,145 - INFO - Epoch: 123/500, Iter: 11/119 -- train_loss: 1.1333 
2025-08-11 00:08:44,412 - INFO - Epoch: 123/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 00:08:44,709 - INFO - Epoch: 123/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:12:24,906 - INFO - Epoch: 124/500, Iter: 1/119 -- train_loss: 1.1463 


[1/119]   1%|           [00:00<?]

2025-08-11 00:12:31,656 - INFO - Epoch: 124/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 00:12:31,948 - INFO - Epoch: 124/500, Iter: 3/119 -- train_loss: 1.1744 
2025-08-11 00:12:32,224 - INFO - Epoch: 124/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 00:12:32,535 - INFO - Epoch: 124/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 00:12:32,849 - INFO - Epoch: 124/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 00:12:33,130 - INFO - Epoch: 124/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 00:12:33,444 - INFO - Epoch: 124/500, Iter: 8/119 -- train_loss: 1.1721 
2025-08-11 00:12:33,742 - INFO - Epoch: 124/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 00:12:41,497 - INFO - Epoch: 124/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 00:12:41,774 - INFO - Epoch: 124/500, Iter: 11/119 -- train_loss: 1.1607 
2025-08-11 00:12:42,075 - INFO - Epoch: 124/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 00:12:42,347 - INFO - Epoch: 124/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:16:05,003 - INFO - Epoch: 125/500, Iter: 1/119 -- train_loss: 1.1608 


[1/119]   1%|           [00:00<?]

2025-08-11 00:16:11,028 - INFO - Epoch: 125/500, Iter: 2/119 -- train_loss: 1.1510 
2025-08-11 00:16:11,327 - INFO - Epoch: 125/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 00:16:11,623 - INFO - Epoch: 125/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 00:16:11,907 - INFO - Epoch: 125/500, Iter: 5/119 -- train_loss: 1.1606 
2025-08-11 00:16:12,247 - INFO - Epoch: 125/500, Iter: 6/119 -- train_loss: 1.1751 
2025-08-11 00:16:12,563 - INFO - Epoch: 125/500, Iter: 7/119 -- train_loss: 1.1437 
2025-08-11 00:16:12,892 - INFO - Epoch: 125/500, Iter: 8/119 -- train_loss: 1.1570 
2025-08-11 00:16:13,234 - INFO - Epoch: 125/500, Iter: 9/119 -- train_loss: 1.1608 
2025-08-11 00:16:18,166 - INFO - Epoch: 125/500, Iter: 10/119 -- train_loss: 1.0801 
2025-08-11 00:16:18,440 - INFO - Epoch: 125/500, Iter: 11/119 -- train_loss: 1.1274 
2025-08-11 00:16:19,602 - INFO - Epoch: 125/500, Iter: 12/119 -- train_loss: 1.1660 
2025-08-11 00:16:19,903 - INFO - Epoch: 125/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:19:51,322 - INFO - Epoch: 126/500, Iter: 1/119 -- train_loss: 1.1269 


[1/119]   1%|           [00:00<?]

2025-08-11 00:19:54,946 - INFO - Epoch: 126/500, Iter: 2/119 -- train_loss: 1.1542 
2025-08-11 00:19:55,262 - INFO - Epoch: 126/500, Iter: 3/119 -- train_loss: 1.1498 
2025-08-11 00:19:57,340 - INFO - Epoch: 126/500, Iter: 4/119 -- train_loss: 1.1110 
2025-08-11 00:19:57,654 - INFO - Epoch: 126/500, Iter: 5/119 -- train_loss: 1.1730 
2025-08-11 00:19:57,931 - INFO - Epoch: 126/500, Iter: 6/119 -- train_loss: 1.1738 
2025-08-11 00:19:58,221 - INFO - Epoch: 126/500, Iter: 7/119 -- train_loss: 1.1717 
2025-08-11 00:19:58,487 - INFO - Epoch: 126/500, Iter: 8/119 -- train_loss: 1.1259 
2025-08-11 00:20:07,176 - INFO - Epoch: 126/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 00:20:07,421 - INFO - Epoch: 126/500, Iter: 10/119 -- train_loss: 1.1674 
2025-08-11 00:20:07,741 - INFO - Epoch: 126/500, Iter: 11/119 -- train_loss: 1.1481 
2025-08-11 00:20:08,065 - INFO - Epoch: 126/500, Iter: 12/119 -- train_loss: 1.1396 
2025-08-11 00:20:08,370 - INFO - Epoch: 126/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:23:47,046 - INFO - Epoch: 127/500, Iter: 1/119 -- train_loss: 1.1739 


[1/119]   1%|           [00:00<?]

2025-08-11 00:23:47,299 - INFO - Epoch: 127/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 00:23:48,080 - INFO - Epoch: 127/500, Iter: 3/119 -- train_loss: 1.1708 
2025-08-11 00:23:49,547 - INFO - Epoch: 127/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 00:23:50,095 - INFO - Epoch: 127/500, Iter: 5/119 -- train_loss: 1.1659 
2025-08-11 00:23:50,386 - INFO - Epoch: 127/500, Iter: 6/119 -- train_loss: 1.1547 
2025-08-11 00:23:50,671 - INFO - Epoch: 127/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 00:23:50,951 - INFO - Epoch: 127/500, Iter: 8/119 -- train_loss: 1.1526 
2025-08-11 00:23:52,542 - INFO - Epoch: 127/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 00:23:52,816 - INFO - Epoch: 127/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 00:23:56,180 - INFO - Epoch: 127/500, Iter: 11/119 -- train_loss: 1.1746 
2025-08-11 00:24:01,233 - INFO - Epoch: 127/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 00:24:02,185 - INFO - Epoch: 127/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:27:34,851 - INFO - Epoch: 128/500, Iter: 1/119 -- train_loss: 1.1750 


[1/119]   1%|           [00:00<?]

2025-08-11 00:27:35,161 - INFO - Epoch: 128/500, Iter: 2/119 -- train_loss: 1.1744 
2025-08-11 00:27:39,592 - INFO - Epoch: 128/500, Iter: 3/119 -- train_loss: 1.1750 
2025-08-11 00:27:39,870 - INFO - Epoch: 128/500, Iter: 4/119 -- train_loss: 1.1677 
2025-08-11 00:27:40,176 - INFO - Epoch: 128/500, Iter: 5/119 -- train_loss: 1.1748 
2025-08-11 00:27:40,477 - INFO - Epoch: 128/500, Iter: 6/119 -- train_loss: 1.1154 
2025-08-11 00:27:40,763 - INFO - Epoch: 128/500, Iter: 7/119 -- train_loss: 1.1799 
2025-08-11 00:27:44,137 - INFO - Epoch: 128/500, Iter: 8/119 -- train_loss: 1.1558 
2025-08-11 00:27:44,434 - INFO - Epoch: 128/500, Iter: 9/119 -- train_loss: 1.1621 
2025-08-11 00:27:44,717 - INFO - Epoch: 128/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 00:27:47,444 - INFO - Epoch: 128/500, Iter: 11/119 -- train_loss: 1.1440 
2025-08-11 00:27:47,727 - INFO - Epoch: 128/500, Iter: 12/119 -- train_loss: 1.1693 
2025-08-11 00:27:48,027 - INFO - Epoch: 128/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:31:30,574 - INFO - Epoch: 129/500, Iter: 1/119 -- train_loss: 1.1602 


[1/119]   1%|           [00:00<?]

2025-08-11 00:31:30,889 - INFO - Epoch: 129/500, Iter: 2/119 -- train_loss: 1.1755 
2025-08-11 00:31:31,193 - INFO - Epoch: 129/500, Iter: 3/119 -- train_loss: 1.1398 
2025-08-11 00:31:31,511 - INFO - Epoch: 129/500, Iter: 4/119 -- train_loss: 1.1762 
2025-08-11 00:31:31,828 - INFO - Epoch: 129/500, Iter: 5/119 -- train_loss: 1.1768 
2025-08-11 00:31:32,142 - INFO - Epoch: 129/500, Iter: 6/119 -- train_loss: 1.1749 
2025-08-11 00:31:32,437 - INFO - Epoch: 129/500, Iter: 7/119 -- train_loss: 1.1408 
2025-08-11 00:31:32,776 - INFO - Epoch: 129/500, Iter: 8/119 -- train_loss: 1.1544 
2025-08-11 00:31:40,137 - INFO - Epoch: 129/500, Iter: 9/119 -- train_loss: 1.1764 
2025-08-11 00:31:40,462 - INFO - Epoch: 129/500, Iter: 10/119 -- train_loss: 1.1755 
2025-08-11 00:31:40,755 - INFO - Epoch: 129/500, Iter: 11/119 -- train_loss: 1.0030 
2025-08-11 00:31:41,040 - INFO - Epoch: 129/500, Iter: 12/119 -- train_loss: 1.1785 
2025-08-11 00:31:41,362 - INFO - Epoch: 129/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:35:06,693 - INFO - Epoch: 130/500, Iter: 1/119 -- train_loss: 1.1623 


[1/119]   1%|           [00:00<?]

2025-08-11 00:35:07,035 - INFO - Epoch: 130/500, Iter: 2/119 -- train_loss: 1.1339 
2025-08-11 00:35:09,584 - INFO - Epoch: 130/500, Iter: 3/119 -- train_loss: 1.1700 
2025-08-11 00:35:09,844 - INFO - Epoch: 130/500, Iter: 4/119 -- train_loss: 1.1745 
2025-08-11 00:35:10,146 - INFO - Epoch: 130/500, Iter: 5/119 -- train_loss: 1.1657 
2025-08-11 00:35:10,451 - INFO - Epoch: 130/500, Iter: 6/119 -- train_loss: 1.0769 
2025-08-11 00:35:10,751 - INFO - Epoch: 130/500, Iter: 7/119 -- train_loss: 1.1755 
2025-08-11 00:35:11,084 - INFO - Epoch: 130/500, Iter: 8/119 -- train_loss: 1.1642 
2025-08-11 00:35:12,351 - INFO - Epoch: 130/500, Iter: 9/119 -- train_loss: 1.1198 
2025-08-11 00:35:13,501 - INFO - Epoch: 130/500, Iter: 10/119 -- train_loss: 1.1681 
2025-08-11 00:35:14,508 - INFO - Epoch: 130/500, Iter: 11/119 -- train_loss: 1.1759 
2025-08-11 00:35:14,818 - INFO - Epoch: 130/500, Iter: 12/119 -- train_loss: 1.1763 
2025-08-11 00:35:15,101 - INFO - Epoch: 130/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:38:45,614 - INFO - Epoch: 131/500, Iter: 1/119 -- train_loss: 1.1171 


[1/119]   1%|           [00:00<?]

2025-08-11 00:38:49,180 - INFO - Epoch: 131/500, Iter: 2/119 -- train_loss: 1.1745 
2025-08-11 00:38:49,488 - INFO - Epoch: 131/500, Iter: 3/119 -- train_loss: 1.1760 
2025-08-11 00:38:56,396 - INFO - Epoch: 131/500, Iter: 4/119 -- train_loss: 1.1243 
2025-08-11 00:38:56,661 - INFO - Epoch: 131/500, Iter: 5/119 -- train_loss: 1.1823 
2025-08-11 00:38:56,964 - INFO - Epoch: 131/500, Iter: 6/119 -- train_loss: 1.1744 
2025-08-11 00:38:57,214 - INFO - Epoch: 131/500, Iter: 7/119 -- train_loss: 1.1756 
2025-08-11 00:38:57,490 - INFO - Epoch: 131/500, Iter: 8/119 -- train_loss: 1.1645 
2025-08-11 00:38:57,779 - INFO - Epoch: 131/500, Iter: 9/119 -- train_loss: 1.1754 
2025-08-11 00:38:58,087 - INFO - Epoch: 131/500, Iter: 10/119 -- train_loss: 1.1576 
2025-08-11 00:38:58,975 - INFO - Epoch: 131/500, Iter: 11/119 -- train_loss: 1.1383 
2025-08-11 00:39:00,725 - INFO - Epoch: 131/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 00:39:01,073 - INFO - Epoch: 131/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:42:30,369 - INFO - Epoch: 132/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 00:42:31,917 - INFO - Epoch: 132/500, Iter: 2/119 -- train_loss: 1.1092 
2025-08-11 00:42:32,243 - INFO - Epoch: 132/500, Iter: 3/119 -- train_loss: 1.1750 
2025-08-11 00:42:36,038 - INFO - Epoch: 132/500, Iter: 4/119 -- train_loss: 1.1753 
2025-08-11 00:42:36,380 - INFO - Epoch: 132/500, Iter: 5/119 -- train_loss: 1.1430 
2025-08-11 00:42:36,700 - INFO - Epoch: 132/500, Iter: 6/119 -- train_loss: 1.1674 
2025-08-11 00:42:37,024 - INFO - Epoch: 132/500, Iter: 7/119 -- train_loss: 1.1745 
2025-08-11 00:42:37,346 - INFO - Epoch: 132/500, Iter: 8/119 -- train_loss: 1.1717 
2025-08-11 00:42:37,677 - INFO - Epoch: 132/500, Iter: 9/119 -- train_loss: 1.1745 
2025-08-11 00:42:46,157 - INFO - Epoch: 132/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 00:42:46,444 - INFO - Epoch: 132/500, Iter: 11/119 -- train_loss: 1.1744 
2025-08-11 00:42:46,716 - INFO - Epoch: 132/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 00:42:46,994 - INFO - Epoch: 132/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:46:06,914 - INFO - Epoch: 133/500, Iter: 1/119 -- train_loss: 1.1785 


[1/119]   1%|           [00:00<?]

2025-08-11 00:46:07,194 - INFO - Epoch: 133/500, Iter: 2/119 -- train_loss: 1.1252 
2025-08-11 00:46:07,445 - INFO - Epoch: 133/500, Iter: 3/119 -- train_loss: 1.1758 
2025-08-11 00:46:07,725 - INFO - Epoch: 133/500, Iter: 4/119 -- train_loss: 1.1550 
2025-08-11 00:46:08,042 - INFO - Epoch: 133/500, Iter: 5/119 -- train_loss: 1.1761 
2025-08-11 00:46:08,357 - INFO - Epoch: 133/500, Iter: 6/119 -- train_loss: 1.1577 
2025-08-11 00:46:08,641 - INFO - Epoch: 133/500, Iter: 7/119 -- train_loss: 1.1784 
2025-08-11 00:46:08,918 - INFO - Epoch: 133/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 00:46:10,684 - INFO - Epoch: 133/500, Iter: 9/119 -- train_loss: 1.1728 
2025-08-11 00:46:11,384 - INFO - Epoch: 133/500, Iter: 10/119 -- train_loss: 1.1544 
2025-08-11 00:46:11,660 - INFO - Epoch: 133/500, Iter: 11/119 -- train_loss: 1.1758 
2025-08-11 00:46:11,967 - INFO - Epoch: 133/500, Iter: 12/119 -- train_loss: 1.1550 
2025-08-11 00:46:12,229 - INFO - Epoch: 133/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:49:46,633 - INFO - Epoch: 134/500, Iter: 1/119 -- train_loss: 1.1588 


[1/119]   1%|           [00:00<?]

2025-08-11 00:49:46,965 - INFO - Epoch: 134/500, Iter: 2/119 -- train_loss: 1.1810 
2025-08-11 00:49:47,288 - INFO - Epoch: 134/500, Iter: 3/119 -- train_loss: 1.1785 
2025-08-11 00:49:47,603 - INFO - Epoch: 134/500, Iter: 4/119 -- train_loss: 1.1706 
2025-08-11 00:49:47,883 - INFO - Epoch: 134/500, Iter: 5/119 -- train_loss: 1.1473 
2025-08-11 00:49:48,183 - INFO - Epoch: 134/500, Iter: 6/119 -- train_loss: 0.9442 
2025-08-11 00:49:48,455 - INFO - Epoch: 134/500, Iter: 7/119 -- train_loss: 1.1675 
2025-08-11 00:49:48,732 - INFO - Epoch: 134/500, Iter: 8/119 -- train_loss: 1.1542 
2025-08-11 00:49:51,092 - INFO - Epoch: 134/500, Iter: 9/119 -- train_loss: 1.1484 
2025-08-11 00:49:51,425 - INFO - Epoch: 134/500, Iter: 10/119 -- train_loss: 1.1684 
2025-08-11 00:49:51,749 - INFO - Epoch: 134/500, Iter: 11/119 -- train_loss: 1.1744 
2025-08-11 00:49:52,584 - INFO - Epoch: 134/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 00:49:52,868 - INFO - Epoch: 134/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:53:26,536 - INFO - Epoch: 135/500, Iter: 1/119 -- train_loss: 1.1405 


[1/119]   1%|           [00:00<?]

2025-08-11 00:53:30,507 - INFO - Epoch: 135/500, Iter: 2/119 -- train_loss: 1.1751 
2025-08-11 00:53:30,807 - INFO - Epoch: 135/500, Iter: 3/119 -- train_loss: 1.1748 
2025-08-11 00:53:31,127 - INFO - Epoch: 135/500, Iter: 4/119 -- train_loss: 1.1776 
2025-08-11 00:53:31,457 - INFO - Epoch: 135/500, Iter: 5/119 -- train_loss: 1.1754 
2025-08-11 00:53:32,404 - INFO - Epoch: 135/500, Iter: 6/119 -- train_loss: 1.1182 
2025-08-11 00:53:32,713 - INFO - Epoch: 135/500, Iter: 7/119 -- train_loss: 1.0985 
2025-08-11 00:53:33,004 - INFO - Epoch: 135/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 00:53:33,305 - INFO - Epoch: 135/500, Iter: 9/119 -- train_loss: 1.1756 
2025-08-11 00:53:40,373 - INFO - Epoch: 135/500, Iter: 10/119 -- train_loss: 1.1869 
2025-08-11 00:53:40,657 - INFO - Epoch: 135/500, Iter: 11/119 -- train_loss: 1.1688 
2025-08-11 00:53:40,957 - INFO - Epoch: 135/500, Iter: 12/119 -- train_loss: 1.1575 
2025-08-11 00:53:45,296 - INFO - Epoch: 135/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 00:57:14,872 - INFO - Epoch: 136/500, Iter: 1/119 -- train_loss: 1.1424 


[1/119]   1%|           [00:00<?]

2025-08-11 00:57:17,602 - INFO - Epoch: 136/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-11 00:57:17,939 - INFO - Epoch: 136/500, Iter: 3/119 -- train_loss: 1.0596 
2025-08-11 00:57:18,257 - INFO - Epoch: 136/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 00:57:20,414 - INFO - Epoch: 136/500, Iter: 5/119 -- train_loss: 1.1254 
2025-08-11 00:57:20,719 - INFO - Epoch: 136/500, Iter: 6/119 -- train_loss: 1.1649 
2025-08-11 00:57:21,009 - INFO - Epoch: 136/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 00:57:21,319 - INFO - Epoch: 136/500, Iter: 8/119 -- train_loss: 1.0711 
2025-08-11 00:57:21,661 - INFO - Epoch: 136/500, Iter: 9/119 -- train_loss: 1.1238 
2025-08-11 00:57:25,221 - INFO - Epoch: 136/500, Iter: 10/119 -- train_loss: 1.1755 
2025-08-11 00:57:25,535 - INFO - Epoch: 136/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-11 00:57:25,812 - INFO - Epoch: 136/500, Iter: 12/119 -- train_loss: 1.1706 
2025-08-11 00:57:29,619 - INFO - Epoch: 136/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:01:07,836 - INFO - Epoch: 137/500, Iter: 1/119 -- train_loss: 1.1300 


[1/119]   1%|           [00:00<?]

2025-08-11 01:01:08,241 - INFO - Epoch: 137/500, Iter: 2/119 -- train_loss: 1.1723 
2025-08-11 01:01:11,158 - INFO - Epoch: 137/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 01:01:11,454 - INFO - Epoch: 137/500, Iter: 4/119 -- train_loss: 1.1744 
2025-08-11 01:01:11,722 - INFO - Epoch: 137/500, Iter: 5/119 -- train_loss: 1.1747 
2025-08-11 01:01:12,007 - INFO - Epoch: 137/500, Iter: 6/119 -- train_loss: 1.0917 
2025-08-11 01:01:12,284 - INFO - Epoch: 137/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 01:01:12,596 - INFO - Epoch: 137/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 01:01:18,002 - INFO - Epoch: 137/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 01:01:18,316 - INFO - Epoch: 137/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 01:01:18,643 - INFO - Epoch: 137/500, Iter: 11/119 -- train_loss: 1.1750 
2025-08-11 01:01:21,693 - INFO - Epoch: 137/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 01:01:21,987 - INFO - Epoch: 137/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:05:11,951 - INFO - Epoch: 138/500, Iter: 1/119 -- train_loss: 1.1728 


[1/119]   1%|           [00:00<?]

2025-08-11 01:05:19,998 - INFO - Epoch: 138/500, Iter: 2/119 -- train_loss: 1.1498 
2025-08-11 01:05:20,330 - INFO - Epoch: 138/500, Iter: 3/119 -- train_loss: 1.1797 
2025-08-11 01:05:20,647 - INFO - Epoch: 138/500, Iter: 4/119 -- train_loss: 1.1800 
2025-08-11 01:05:20,972 - INFO - Epoch: 138/500, Iter: 5/119 -- train_loss: 1.1745 
2025-08-11 01:05:21,323 - INFO - Epoch: 138/500, Iter: 6/119 -- train_loss: 1.1747 
2025-08-11 01:05:21,656 - INFO - Epoch: 138/500, Iter: 7/119 -- train_loss: 1.1797 
2025-08-11 01:05:21,993 - INFO - Epoch: 138/500, Iter: 8/119 -- train_loss: 1.1238 
2025-08-11 01:05:22,294 - INFO - Epoch: 138/500, Iter: 9/119 -- train_loss: 1.1734 
2025-08-11 01:05:32,137 - INFO - Epoch: 138/500, Iter: 10/119 -- train_loss: 1.1750 
2025-08-11 01:05:32,408 - INFO - Epoch: 138/500, Iter: 11/119 -- train_loss: 1.1481 
2025-08-11 01:05:32,698 - INFO - Epoch: 138/500, Iter: 12/119 -- train_loss: 1.1515 
2025-08-11 01:05:32,977 - INFO - Epoch: 138/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:08:55,507 - INFO - Epoch: 139/500, Iter: 1/119 -- train_loss: 1.1434 


[1/119]   1%|           [00:00<?]

2025-08-11 01:08:55,902 - INFO - Epoch: 139/500, Iter: 2/119 -- train_loss: 1.1772 
2025-08-11 01:08:56,179 - INFO - Epoch: 139/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 01:08:59,364 - INFO - Epoch: 139/500, Iter: 4/119 -- train_loss: 1.0505 
2025-08-11 01:08:59,636 - INFO - Epoch: 139/500, Iter: 5/119 -- train_loss: 1.1475 
2025-08-11 01:08:59,908 - INFO - Epoch: 139/500, Iter: 6/119 -- train_loss: 1.1778 
2025-08-11 01:09:00,199 - INFO - Epoch: 139/500, Iter: 7/119 -- train_loss: 1.1735 
2025-08-11 01:09:01,154 - INFO - Epoch: 139/500, Iter: 8/119 -- train_loss: 1.1544 
2025-08-11 01:09:01,983 - INFO - Epoch: 139/500, Iter: 9/119 -- train_loss: 1.1387 
2025-08-11 01:09:04,034 - INFO - Epoch: 139/500, Iter: 10/119 -- train_loss: 1.1790 
2025-08-11 01:09:04,368 - INFO - Epoch: 139/500, Iter: 11/119 -- train_loss: 1.0933 
2025-08-11 01:09:09,051 - INFO - Epoch: 139/500, Iter: 12/119 -- train_loss: 1.1220 
2025-08-11 01:09:09,388 - INFO - Epoch: 139/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:12:42,830 - INFO - Epoch: 140/500, Iter: 1/119 -- train_loss: 1.1751 


[1/119]   1%|           [00:00<?]

2025-08-11 01:12:46,758 - INFO - Epoch: 140/500, Iter: 2/119 -- train_loss: 1.1750 
2025-08-11 01:12:47,034 - INFO - Epoch: 140/500, Iter: 3/119 -- train_loss: 1.1531 
2025-08-11 01:12:47,317 - INFO - Epoch: 140/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 01:12:47,595 - INFO - Epoch: 140/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 01:12:47,863 - INFO - Epoch: 140/500, Iter: 6/119 -- train_loss: 1.1745 
2025-08-11 01:12:48,385 - INFO - Epoch: 140/500, Iter: 7/119 -- train_loss: 1.1801 
2025-08-11 01:12:48,667 - INFO - Epoch: 140/500, Iter: 8/119 -- train_loss: 1.1744 
2025-08-11 01:12:48,935 - INFO - Epoch: 140/500, Iter: 9/119 -- train_loss: 1.1758 
2025-08-11 01:12:53,567 - INFO - Epoch: 140/500, Iter: 10/119 -- train_loss: 1.1569 
2025-08-11 01:12:53,854 - INFO - Epoch: 140/500, Iter: 11/119 -- train_loss: 1.1750 
2025-08-11 01:12:54,164 - INFO - Epoch: 140/500, Iter: 12/119 -- train_loss: 1.1105 
2025-08-11 01:12:57,522 - INFO - Epoch: 140/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:16:15,139 - INFO - Epoch: 141/500, Iter: 1/119 -- train_loss: 1.1393 


[1/119]   1%|           [00:00<?]

2025-08-11 01:16:15,417 - INFO - Epoch: 141/500, Iter: 2/119 -- train_loss: 1.1748 
2025-08-11 01:16:15,689 - INFO - Epoch: 141/500, Iter: 3/119 -- train_loss: 1.1725 
2025-08-11 01:16:23,702 - INFO - Epoch: 141/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 01:16:23,942 - INFO - Epoch: 141/500, Iter: 5/119 -- train_loss: 1.1378 
2025-08-11 01:16:24,164 - INFO - Epoch: 141/500, Iter: 6/119 -- train_loss: 1.0693 
2025-08-11 01:16:24,406 - INFO - Epoch: 141/500, Iter: 7/119 -- train_loss: 1.0431 
2025-08-11 01:16:24,650 - INFO - Epoch: 141/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 01:16:24,899 - INFO - Epoch: 141/500, Iter: 9/119 -- train_loss: 1.1750 
2025-08-11 01:16:25,152 - INFO - Epoch: 141/500, Iter: 10/119 -- train_loss: 1.1630 
2025-08-11 01:16:25,397 - INFO - Epoch: 141/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-11 01:16:32,161 - INFO - Epoch: 141/500, Iter: 12/119 -- train_loss: 1.1634 
2025-08-11 01:16:32,402 - INFO - Epoch: 141/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:19:59,831 - INFO - Epoch: 142/500, Iter: 1/119 -- train_loss: 1.1745 


[1/119]   1%|           [00:00<?]

2025-08-11 01:20:00,099 - INFO - Epoch: 142/500, Iter: 2/119 -- train_loss: 1.1580 
2025-08-11 01:20:00,359 - INFO - Epoch: 142/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 01:20:00,627 - INFO - Epoch: 142/500, Iter: 4/119 -- train_loss: 1.1398 
2025-08-11 01:20:00,908 - INFO - Epoch: 142/500, Iter: 5/119 -- train_loss: 1.1702 
2025-08-11 01:20:01,198 - INFO - Epoch: 142/500, Iter: 6/119 -- train_loss: 1.1642 
2025-08-11 01:20:01,456 - INFO - Epoch: 142/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 01:20:01,732 - INFO - Epoch: 142/500, Iter: 8/119 -- train_loss: 1.1751 
2025-08-11 01:20:07,240 - INFO - Epoch: 142/500, Iter: 9/119 -- train_loss: 1.1681 
2025-08-11 01:20:07,504 - INFO - Epoch: 142/500, Iter: 10/119 -- train_loss: 1.0753 
2025-08-11 01:20:07,757 - INFO - Epoch: 142/500, Iter: 11/119 -- train_loss: 1.1736 
2025-08-11 01:20:08,010 - INFO - Epoch: 142/500, Iter: 12/119 -- train_loss: 1.1783 
2025-08-11 01:20:08,293 - INFO - Epoch: 142/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:23:35,229 - INFO - Epoch: 143/500, Iter: 1/119 -- train_loss: 1.1455 


[1/119]   1%|           [00:00<?]

2025-08-11 01:23:39,228 - INFO - Epoch: 143/500, Iter: 2/119 -- train_loss: 1.1745 
2025-08-11 01:23:39,510 - INFO - Epoch: 143/500, Iter: 3/119 -- train_loss: 1.1673 
2025-08-11 01:23:39,769 - INFO - Epoch: 143/500, Iter: 4/119 -- train_loss: 1.1372 
2025-08-11 01:23:40,017 - INFO - Epoch: 143/500, Iter: 5/119 -- train_loss: 1.1378 
2025-08-11 01:23:40,276 - INFO - Epoch: 143/500, Iter: 6/119 -- train_loss: 1.1490 
2025-08-11 01:23:40,520 - INFO - Epoch: 143/500, Iter: 7/119 -- train_loss: 1.1481 
2025-08-11 01:23:40,789 - INFO - Epoch: 143/500, Iter: 8/119 -- train_loss: 1.1683 
2025-08-11 01:23:41,048 - INFO - Epoch: 143/500, Iter: 9/119 -- train_loss: 1.1242 
2025-08-11 01:23:48,038 - INFO - Epoch: 143/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 01:23:48,274 - INFO - Epoch: 143/500, Iter: 11/119 -- train_loss: 1.1452 
2025-08-11 01:23:48,687 - INFO - Epoch: 143/500, Iter: 12/119 -- train_loss: 1.1299 
2025-08-11 01:23:48,952 - INFO - Epoch: 143/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:27:20,317 - INFO - Epoch: 144/500, Iter: 1/119 -- train_loss: 1.1290 


[1/119]   1%|           [00:00<?]

2025-08-11 01:27:21,857 - INFO - Epoch: 144/500, Iter: 2/119 -- train_loss: 1.1191 
2025-08-11 01:27:22,129 - INFO - Epoch: 144/500, Iter: 3/119 -- train_loss: 1.1792 
2025-08-11 01:27:22,402 - INFO - Epoch: 144/500, Iter: 4/119 -- train_loss: 1.1428 
2025-08-11 01:27:22,662 - INFO - Epoch: 144/500, Iter: 5/119 -- train_loss: 1.1746 
2025-08-11 01:27:31,180 - INFO - Epoch: 144/500, Iter: 6/119 -- train_loss: 1.1791 
2025-08-11 01:27:31,411 - INFO - Epoch: 144/500, Iter: 7/119 -- train_loss: 1.1788 
2025-08-11 01:27:31,669 - INFO - Epoch: 144/500, Iter: 8/119 -- train_loss: 1.1704 
2025-08-11 01:27:31,910 - INFO - Epoch: 144/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 01:27:32,159 - INFO - Epoch: 144/500, Iter: 10/119 -- train_loss: 1.1389 
2025-08-11 01:27:32,402 - INFO - Epoch: 144/500, Iter: 11/119 -- train_loss: 1.1631 
2025-08-11 01:27:32,655 - INFO - Epoch: 144/500, Iter: 12/119 -- train_loss: 1.1755 
2025-08-11 01:27:32,912 - INFO - Epoch: 144/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:31:07,274 - INFO - Epoch: 145/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 01:31:10,603 - INFO - Epoch: 145/500, Iter: 2/119 -- train_loss: 1.1780 
2025-08-11 01:31:10,853 - INFO - Epoch: 145/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 01:31:12,243 - INFO - Epoch: 145/500, Iter: 4/119 -- train_loss: 1.1439 
2025-08-11 01:31:12,871 - INFO - Epoch: 145/500, Iter: 5/119 -- train_loss: 1.1691 
2025-08-11 01:31:13,115 - INFO - Epoch: 145/500, Iter: 6/119 -- train_loss: 1.1599 
2025-08-11 01:31:13,367 - INFO - Epoch: 145/500, Iter: 7/119 -- train_loss: 1.1746 
2025-08-11 01:31:13,620 - INFO - Epoch: 145/500, Iter: 8/119 -- train_loss: 1.1748 
2025-08-11 01:31:13,874 - INFO - Epoch: 145/500, Iter: 9/119 -- train_loss: 1.1624 
2025-08-11 01:31:16,343 - INFO - Epoch: 145/500, Iter: 10/119 -- train_loss: 1.1166 
2025-08-11 01:31:16,616 - INFO - Epoch: 145/500, Iter: 11/119 -- train_loss: 1.0720 
2025-08-11 01:31:22,855 - INFO - Epoch: 145/500, Iter: 12/119 -- train_loss: 1.1787 
2025-08-11 01:31:23,115 - INFO - Epoch: 145/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:34:46,866 - INFO - Epoch: 146/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 01:34:53,806 - INFO - Epoch: 146/500, Iter: 2/119 -- train_loss: 1.1949 
2025-08-11 01:34:54,058 - INFO - Epoch: 146/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 01:34:54,330 - INFO - Epoch: 146/500, Iter: 4/119 -- train_loss: 1.1698 
2025-08-11 01:34:54,572 - INFO - Epoch: 146/500, Iter: 5/119 -- train_loss: 1.1732 
2025-08-11 01:34:54,819 - INFO - Epoch: 146/500, Iter: 6/119 -- train_loss: 1.1502 
2025-08-11 01:34:55,070 - INFO - Epoch: 146/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 01:34:55,313 - INFO - Epoch: 146/500, Iter: 8/119 -- train_loss: 1.1748 
2025-08-11 01:35:00,790 - INFO - Epoch: 146/500, Iter: 9/119 -- train_loss: 1.1746 
2025-08-11 01:35:01,035 - INFO - Epoch: 146/500, Iter: 10/119 -- train_loss: 1.1338 
2025-08-11 01:35:01,286 - INFO - Epoch: 146/500, Iter: 11/119 -- train_loss: 1.1298 
2025-08-11 01:35:01,545 - INFO - Epoch: 146/500, Iter: 12/119 -- train_loss: 1.1693 
2025-08-11 01:35:01,800 - INFO - Epoch: 146/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:38:41,323 - INFO - Epoch: 147/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 01:38:42,557 - INFO - Epoch: 147/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-11 01:38:42,815 - INFO - Epoch: 147/500, Iter: 3/119 -- train_loss: 1.1680 
2025-08-11 01:38:43,064 - INFO - Epoch: 147/500, Iter: 4/119 -- train_loss: 1.1857 
2025-08-11 01:38:43,315 - INFO - Epoch: 147/500, Iter: 5/119 -- train_loss: 1.1762 
2025-08-11 01:38:43,599 - INFO - Epoch: 147/500, Iter: 6/119 -- train_loss: 1.1566 
2025-08-11 01:38:43,845 - INFO - Epoch: 147/500, Iter: 7/119 -- train_loss: 1.1496 
2025-08-11 01:38:44,100 - INFO - Epoch: 147/500, Iter: 8/119 -- train_loss: 1.0607 
2025-08-11 01:38:47,612 - INFO - Epoch: 147/500, Iter: 9/119 -- train_loss: 1.1545 
2025-08-11 01:38:47,862 - INFO - Epoch: 147/500, Iter: 10/119 -- train_loss: 1.1526 
2025-08-11 01:38:48,122 - INFO - Epoch: 147/500, Iter: 11/119 -- train_loss: 1.1718 
2025-08-11 01:38:48,378 - INFO - Epoch: 147/500, Iter: 12/119 -- train_loss: 1.1750 
2025-08-11 01:38:48,656 - INFO - Epoch: 147/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:42:13,348 - INFO - Epoch: 148/500, Iter: 1/119 -- train_loss: 1.1277 


[1/119]   1%|           [00:00<?]

2025-08-11 01:42:22,468 - INFO - Epoch: 148/500, Iter: 2/119 -- train_loss: 1.1749 
2025-08-11 01:42:22,704 - INFO - Epoch: 148/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 01:42:22,950 - INFO - Epoch: 148/500, Iter: 4/119 -- train_loss: 1.1674 
2025-08-11 01:42:23,193 - INFO - Epoch: 148/500, Iter: 5/119 -- train_loss: 1.1689 
2025-08-11 01:42:23,436 - INFO - Epoch: 148/500, Iter: 6/119 -- train_loss: 1.1403 
2025-08-11 01:42:23,678 - INFO - Epoch: 148/500, Iter: 7/119 -- train_loss: 1.1432 
2025-08-11 01:42:23,925 - INFO - Epoch: 148/500, Iter: 8/119 -- train_loss: 1.1645 
2025-08-11 01:42:26,800 - INFO - Epoch: 148/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 01:42:27,174 - INFO - Epoch: 148/500, Iter: 10/119 -- train_loss: 1.1756 
2025-08-11 01:42:27,417 - INFO - Epoch: 148/500, Iter: 11/119 -- train_loss: 1.0499 
2025-08-11 01:42:27,659 - INFO - Epoch: 148/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 01:42:27,894 - INFO - Epoch: 148/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:45:57,601 - INFO - Epoch: 149/500, Iter: 1/119 -- train_loss: 1.1756 


[1/119]   1%|           [00:00<?]

2025-08-11 01:46:01,131 - INFO - Epoch: 149/500, Iter: 2/119 -- train_loss: 1.1745 
2025-08-11 01:46:01,387 - INFO - Epoch: 149/500, Iter: 3/119 -- train_loss: 1.0979 
2025-08-11 01:46:01,618 - INFO - Epoch: 149/500, Iter: 4/119 -- train_loss: 1.1745 
2025-08-11 01:46:01,897 - INFO - Epoch: 149/500, Iter: 5/119 -- train_loss: 1.1813 
2025-08-11 01:46:05,505 - INFO - Epoch: 149/500, Iter: 6/119 -- train_loss: 1.1706 
2025-08-11 01:46:05,748 - INFO - Epoch: 149/500, Iter: 7/119 -- train_loss: 1.1267 
2025-08-11 01:46:06,018 - INFO - Epoch: 149/500, Iter: 8/119 -- train_loss: 1.1607 
2025-08-11 01:46:06,275 - INFO - Epoch: 149/500, Iter: 9/119 -- train_loss: 1.1660 
2025-08-11 01:46:06,691 - INFO - Epoch: 149/500, Iter: 10/119 -- train_loss: 1.1238 
2025-08-11 01:46:06,946 - INFO - Epoch: 149/500, Iter: 11/119 -- train_loss: 1.1616 
2025-08-11 01:46:09,180 - INFO - Epoch: 149/500, Iter: 12/119 -- train_loss: 1.0816 
2025-08-11 01:46:09,433 - INFO - Epoch: 149/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:49:44,574 - INFO - Epoch: 150/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 01:49:44,864 - INFO - Epoch: 150/500, Iter: 2/119 -- train_loss: 1.1205 
2025-08-11 01:49:45,107 - INFO - Epoch: 150/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 01:49:45,355 - INFO - Epoch: 150/500, Iter: 4/119 -- train_loss: 1.0390 
2025-08-11 01:49:45,614 - INFO - Epoch: 150/500, Iter: 5/119 -- train_loss: 1.1511 
2025-08-11 01:49:45,879 - INFO - Epoch: 150/500, Iter: 6/119 -- train_loss: 1.1676 
2025-08-11 01:49:46,113 - INFO - Epoch: 150/500, Iter: 7/119 -- train_loss: 1.1716 
2025-08-11 01:49:46,396 - INFO - Epoch: 150/500, Iter: 8/119 -- train_loss: 0.8732 
2025-08-11 01:49:51,023 - INFO - Epoch: 150/500, Iter: 9/119 -- train_loss: 1.1824 
2025-08-11 01:49:53,751 - INFO - Epoch: 150/500, Iter: 10/119 -- train_loss: 1.1751 
2025-08-11 01:49:54,090 - INFO - Epoch: 150/500, Iter: 11/119 -- train_loss: 1.1432 
2025-08-11 01:49:54,344 - INFO - Epoch: 150/500, Iter: 12/119 -- train_loss: 1.1864 
2025-08-11 01:49:54,602 - INFO - Epoch: 150/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:53:22,506 - INFO - Epoch: 151/500, Iter: 1/119 -- train_loss: 1.1748 


[1/119]   1%|           [00:00<?]

2025-08-11 01:53:23,904 - INFO - Epoch: 151/500, Iter: 2/119 -- train_loss: 1.1753 
2025-08-11 01:53:24,164 - INFO - Epoch: 151/500, Iter: 3/119 -- train_loss: 1.1716 
2025-08-11 01:53:25,338 - INFO - Epoch: 151/500, Iter: 4/119 -- train_loss: 1.1744 
2025-08-11 01:53:25,583 - INFO - Epoch: 151/500, Iter: 5/119 -- train_loss: 1.1626 
2025-08-11 01:53:25,864 - INFO - Epoch: 151/500, Iter: 6/119 -- train_loss: 1.1771 
2025-08-11 01:53:26,099 - INFO - Epoch: 151/500, Iter: 7/119 -- train_loss: 1.1668 
2025-08-11 01:53:26,344 - INFO - Epoch: 151/500, Iter: 8/119 -- train_loss: 1.0709 
2025-08-11 01:53:32,875 - INFO - Epoch: 151/500, Iter: 9/119 -- train_loss: 1.1759 
2025-08-11 01:53:33,114 - INFO - Epoch: 151/500, Iter: 10/119 -- train_loss: 1.1751 
2025-08-11 01:53:33,398 - INFO - Epoch: 151/500, Iter: 11/119 -- train_loss: 1.1744 
2025-08-11 01:53:33,623 - INFO - Epoch: 151/500, Iter: 12/119 -- train_loss: 1.1643 
2025-08-11 01:53:33,901 - INFO - Epoch: 151/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 01:57:00,820 - INFO - Epoch: 152/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 01:57:01,076 - INFO - Epoch: 152/500, Iter: 2/119 -- train_loss: 1.1490 
2025-08-11 01:57:01,359 - INFO - Epoch: 152/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 01:57:01,632 - INFO - Epoch: 152/500, Iter: 4/119 -- train_loss: 1.1487 
2025-08-11 01:57:01,925 - INFO - Epoch: 152/500, Iter: 5/119 -- train_loss: 1.1643 
2025-08-11 01:57:02,200 - INFO - Epoch: 152/500, Iter: 6/119 -- train_loss: 1.1748 
2025-08-11 01:57:02,475 - INFO - Epoch: 152/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 01:57:02,775 - INFO - Epoch: 152/500, Iter: 8/119 -- train_loss: 1.1746 
2025-08-11 01:57:05,965 - INFO - Epoch: 152/500, Iter: 9/119 -- train_loss: 1.1788 
2025-08-11 01:57:06,246 - INFO - Epoch: 152/500, Iter: 10/119 -- train_loss: 1.1380 
2025-08-11 01:57:06,529 - INFO - Epoch: 152/500, Iter: 11/119 -- train_loss: 1.1745 
2025-08-11 01:57:10,768 - INFO - Epoch: 152/500, Iter: 12/119 -- train_loss: 1.1705 
2025-08-11 01:57:11,068 - INFO - Epoch: 152/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:00:39,183 - INFO - Epoch: 153/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 02:00:39,491 - INFO - Epoch: 153/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-11 02:00:39,805 - INFO - Epoch: 153/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 02:00:40,142 - INFO - Epoch: 153/500, Iter: 4/119 -- train_loss: 1.1530 
2025-08-11 02:00:40,440 - INFO - Epoch: 153/500, Iter: 5/119 -- train_loss: 1.0521 
2025-08-11 02:00:40,748 - INFO - Epoch: 153/500, Iter: 6/119 -- train_loss: 1.1693 
2025-08-11 02:00:41,050 - INFO - Epoch: 153/500, Iter: 7/119 -- train_loss: 1.1745 
2025-08-11 02:00:41,410 - INFO - Epoch: 153/500, Iter: 8/119 -- train_loss: 1.1897 
2025-08-11 02:00:45,745 - INFO - Epoch: 153/500, Iter: 9/119 -- train_loss: 1.0328 
2025-08-11 02:00:46,076 - INFO - Epoch: 153/500, Iter: 10/119 -- train_loss: 1.1347 
2025-08-11 02:00:46,366 - INFO - Epoch: 153/500, Iter: 11/119 -- train_loss: 1.1745 
2025-08-11 02:00:46,683 - INFO - Epoch: 153/500, Iter: 12/119 -- train_loss: 1.1660 
2025-08-11 02:00:46,993 - INFO - Epoch: 153/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:04:23,907 - INFO - Epoch: 154/500, Iter: 1/119 -- train_loss: 1.1727 


[1/119]   1%|           [00:00<?]

2025-08-11 02:04:24,260 - INFO - Epoch: 154/500, Iter: 2/119 -- train_loss: 1.1148 
2025-08-11 02:04:24,558 - INFO - Epoch: 154/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 02:04:24,858 - INFO - Epoch: 154/500, Iter: 4/119 -- train_loss: 1.1530 
2025-08-11 02:04:25,301 - INFO - Epoch: 154/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 02:04:25,601 - INFO - Epoch: 154/500, Iter: 6/119 -- train_loss: 1.1210 
2025-08-11 02:04:25,940 - INFO - Epoch: 154/500, Iter: 7/119 -- train_loss: 1.1682 
2025-08-11 02:04:26,234 - INFO - Epoch: 154/500, Iter: 8/119 -- train_loss: 1.1466 
2025-08-11 02:04:30,503 - INFO - Epoch: 154/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 02:04:34,467 - INFO - Epoch: 154/500, Iter: 10/119 -- train_loss: 1.1587 
2025-08-11 02:04:34,730 - INFO - Epoch: 154/500, Iter: 11/119 -- train_loss: 1.1421 
2025-08-11 02:04:35,034 - INFO - Epoch: 154/500, Iter: 12/119 -- train_loss: 1.1727 
2025-08-11 02:04:35,324 - INFO - Epoch: 154/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:08:10,842 - INFO - Epoch: 155/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 02:08:11,263 - INFO - Epoch: 155/500, Iter: 2/119 -- train_loss: 1.1879 
2025-08-11 02:08:11,595 - INFO - Epoch: 155/500, Iter: 3/119 -- train_loss: 1.1719 
2025-08-11 02:08:11,911 - INFO - Epoch: 155/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 02:08:12,202 - INFO - Epoch: 155/500, Iter: 5/119 -- train_loss: 1.1463 
2025-08-11 02:08:12,553 - INFO - Epoch: 155/500, Iter: 6/119 -- train_loss: 1.1560 
2025-08-11 02:08:12,850 - INFO - Epoch: 155/500, Iter: 7/119 -- train_loss: 1.1684 
2025-08-11 02:08:13,161 - INFO - Epoch: 155/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 02:08:19,235 - INFO - Epoch: 155/500, Iter: 9/119 -- train_loss: 1.1533 
2025-08-11 02:08:19,531 - INFO - Epoch: 155/500, Iter: 10/119 -- train_loss: 1.1602 
2025-08-11 02:08:19,798 - INFO - Epoch: 155/500, Iter: 11/119 -- train_loss: 1.1400 
2025-08-11 02:08:20,077 - INFO - Epoch: 155/500, Iter: 12/119 -- train_loss: 1.1581 
2025-08-11 02:08:20,350 - INFO - Epoch: 155/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:12:03,546 - INFO - Epoch: 156/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 02:12:06,099 - INFO - Epoch: 156/500, Iter: 2/119 -- train_loss: 1.1744 
2025-08-11 02:12:06,372 - INFO - Epoch: 156/500, Iter: 3/119 -- train_loss: 1.1761 
2025-08-11 02:12:06,651 - INFO - Epoch: 156/500, Iter: 4/119 -- train_loss: 1.1715 
2025-08-11 02:12:06,982 - INFO - Epoch: 156/500, Iter: 5/119 -- train_loss: 1.1613 
2025-08-11 02:12:07,280 - INFO - Epoch: 156/500, Iter: 6/119 -- train_loss: 1.1037 
2025-08-11 02:12:09,529 - INFO - Epoch: 156/500, Iter: 7/119 -- train_loss: 1.1160 
2025-08-11 02:12:09,848 - INFO - Epoch: 156/500, Iter: 8/119 -- train_loss: 1.1751 
2025-08-11 02:12:10,156 - INFO - Epoch: 156/500, Iter: 9/119 -- train_loss: 1.1681 
2025-08-11 02:12:11,938 - INFO - Epoch: 156/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 02:12:16,322 - INFO - Epoch: 156/500, Iter: 11/119 -- train_loss: 1.1753 
2025-08-11 02:12:16,642 - INFO - Epoch: 156/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 02:12:19,348 - INFO - Epoch: 156/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:15:59,686 - INFO - Epoch: 157/500, Iter: 1/119 -- train_loss: 1.1091 


[1/119]   1%|           [00:00<?]

2025-08-11 02:16:08,447 - INFO - Epoch: 157/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-11 02:16:08,716 - INFO - Epoch: 157/500, Iter: 3/119 -- train_loss: 1.1682 
2025-08-11 02:16:09,002 - INFO - Epoch: 157/500, Iter: 4/119 -- train_loss: 1.1600 
2025-08-11 02:16:09,300 - INFO - Epoch: 157/500, Iter: 5/119 -- train_loss: 1.1721 
2025-08-11 02:16:09,613 - INFO - Epoch: 157/500, Iter: 6/119 -- train_loss: 1.1743 
2025-08-11 02:16:09,906 - INFO - Epoch: 157/500, Iter: 7/119 -- train_loss: 1.1621 
2025-08-11 02:16:10,213 - INFO - Epoch: 157/500, Iter: 8/119 -- train_loss: 1.1385 
2025-08-11 02:16:10,504 - INFO - Epoch: 157/500, Iter: 9/119 -- train_loss: 1.1441 
2025-08-11 02:16:20,294 - INFO - Epoch: 157/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 02:16:20,561 - INFO - Epoch: 157/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-11 02:16:20,836 - INFO - Epoch: 157/500, Iter: 12/119 -- train_loss: 1.1711 
2025-08-11 02:16:21,143 - INFO - Epoch: 157/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:19:53,419 - INFO - Epoch: 158/500, Iter: 1/119 -- train_loss: 1.1677 


[1/119]   1%|           [00:00<?]

2025-08-11 02:19:53,709 - INFO - Epoch: 158/500, Iter: 2/119 -- train_loss: 1.0222 
2025-08-11 02:19:58,970 - INFO - Epoch: 158/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 02:19:59,253 - INFO - Epoch: 158/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 02:19:59,581 - INFO - Epoch: 158/500, Iter: 5/119 -- train_loss: 1.1307 
2025-08-11 02:19:59,865 - INFO - Epoch: 158/500, Iter: 6/119 -- train_loss: 1.1566 
2025-08-11 02:20:00,149 - INFO - Epoch: 158/500, Iter: 7/119 -- train_loss: 1.1384 
2025-08-11 02:20:04,811 - INFO - Epoch: 158/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 02:20:05,110 - INFO - Epoch: 158/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 02:20:05,402 - INFO - Epoch: 158/500, Iter: 10/119 -- train_loss: 1.1454 
2025-08-11 02:20:05,724 - INFO - Epoch: 158/500, Iter: 11/119 -- train_loss: 1.1694 
2025-08-11 02:20:06,034 - INFO - Epoch: 158/500, Iter: 12/119 -- train_loss: 1.1744 
2025-08-11 02:20:06,327 - INFO - Epoch: 158/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:23:41,561 - INFO - Epoch: 159/500, Iter: 1/119 -- train_loss: 1.1504 


[1/119]   1%|           [00:00<?]

2025-08-11 02:23:51,161 - INFO - Epoch: 159/500, Iter: 2/119 -- train_loss: 1.1748 
2025-08-11 02:23:51,444 - INFO - Epoch: 159/500, Iter: 3/119 -- train_loss: 1.0086 
2025-08-11 02:23:51,720 - INFO - Epoch: 159/500, Iter: 4/119 -- train_loss: 1.1679 
2025-08-11 02:23:52,011 - INFO - Epoch: 159/500, Iter: 5/119 -- train_loss: 1.1765 
2025-08-11 02:23:52,337 - INFO - Epoch: 159/500, Iter: 6/119 -- train_loss: 1.1813 
2025-08-11 02:23:52,637 - INFO - Epoch: 159/500, Iter: 7/119 -- train_loss: 1.1850 
2025-08-11 02:23:52,970 - INFO - Epoch: 159/500, Iter: 8/119 -- train_loss: 1.1796 
2025-08-11 02:23:53,320 - INFO - Epoch: 159/500, Iter: 9/119 -- train_loss: 1.1750 
2025-08-11 02:24:00,426 - INFO - Epoch: 159/500, Iter: 10/119 -- train_loss: 1.1444 
2025-08-11 02:24:00,710 - INFO - Epoch: 159/500, Iter: 11/119 -- train_loss: 1.1724 
2025-08-11 02:24:01,053 - INFO - Epoch: 159/500, Iter: 12/119 -- train_loss: 1.1712 
2025-08-11 02:24:01,360 - INFO - Epoch: 159/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:27:36,290 - INFO - Epoch: 160/500, Iter: 1/119 -- train_loss: 1.1729 


[1/119]   1%|           [00:00<?]

2025-08-11 02:27:36,569 - INFO - Epoch: 160/500, Iter: 2/119 -- train_loss: 1.1418 
2025-08-11 02:27:40,228 - INFO - Epoch: 160/500, Iter: 3/119 -- train_loss: 1.1746 
2025-08-11 02:27:40,528 - INFO - Epoch: 160/500, Iter: 4/119 -- train_loss: 1.1746 
2025-08-11 02:27:40,802 - INFO - Epoch: 160/500, Iter: 5/119 -- train_loss: 1.1711 
2025-08-11 02:27:41,110 - INFO - Epoch: 160/500, Iter: 6/119 -- train_loss: 1.1602 
2025-08-11 02:27:41,422 - INFO - Epoch: 160/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 02:27:41,718 - INFO - Epoch: 160/500, Iter: 8/119 -- train_loss: 1.1728 
2025-08-11 02:27:46,052 - INFO - Epoch: 160/500, Iter: 9/119 -- train_loss: 1.1745 
2025-08-11 02:27:46,368 - INFO - Epoch: 160/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 02:27:46,840 - INFO - Epoch: 160/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-11 02:27:47,150 - INFO - Epoch: 160/500, Iter: 12/119 -- train_loss: 1.1721 
2025-08-11 02:27:47,432 - INFO - Epoch: 160/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:31:40,194 - INFO - Epoch: 161/500, Iter: 1/119 -- train_loss: 1.1013 


[1/119]   1%|           [00:00<?]

2025-08-11 02:31:40,475 - INFO - Epoch: 161/500, Iter: 2/119 -- train_loss: 1.1675 
2025-08-11 02:31:40,751 - INFO - Epoch: 161/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 02:31:41,045 - INFO - Epoch: 161/500, Iter: 4/119 -- train_loss: 1.1669 
2025-08-11 02:31:41,308 - INFO - Epoch: 161/500, Iter: 5/119 -- train_loss: 1.1560 
2025-08-11 02:31:41,574 - INFO - Epoch: 161/500, Iter: 6/119 -- train_loss: 1.1528 
2025-08-11 02:31:41,871 - INFO - Epoch: 161/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 02:31:42,141 - INFO - Epoch: 161/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 02:31:51,607 - INFO - Epoch: 161/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 02:31:51,857 - INFO - Epoch: 161/500, Iter: 10/119 -- train_loss: 1.1607 
2025-08-11 02:31:52,145 - INFO - Epoch: 161/500, Iter: 11/119 -- train_loss: 1.1714 
2025-08-11 02:31:52,450 - INFO - Epoch: 161/500, Iter: 12/119 -- train_loss: 1.1720 
2025-08-11 02:31:52,724 - INFO - Epoch: 161/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:35:37,936 - INFO - Epoch: 162/500, Iter: 1/119 -- train_loss: 1.1581 


[1/119]   1%|           [00:00<?]

2025-08-11 02:35:38,235 - INFO - Epoch: 162/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-11 02:35:38,555 - INFO - Epoch: 162/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 02:35:38,855 - INFO - Epoch: 162/500, Iter: 4/119 -- train_loss: 1.1509 
2025-08-11 02:35:39,165 - INFO - Epoch: 162/500, Iter: 5/119 -- train_loss: 1.1888 
2025-08-11 02:35:39,488 - INFO - Epoch: 162/500, Iter: 6/119 -- train_loss: 1.1510 
2025-08-11 02:35:39,830 - INFO - Epoch: 162/500, Iter: 7/119 -- train_loss: 1.1791 
2025-08-11 02:35:40,115 - INFO - Epoch: 162/500, Iter: 8/119 -- train_loss: 1.1744 
2025-08-11 02:35:42,864 - INFO - Epoch: 162/500, Iter: 9/119 -- train_loss: 1.1744 
2025-08-11 02:35:43,255 - INFO - Epoch: 162/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 02:35:43,567 - INFO - Epoch: 162/500, Iter: 11/119 -- train_loss: 1.1669 
2025-08-11 02:35:43,888 - INFO - Epoch: 162/500, Iter: 12/119 -- train_loss: 1.1642 
2025-08-11 02:35:45,693 - INFO - Epoch: 162/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:39:25,267 - INFO - Epoch: 163/500, Iter: 1/119 -- train_loss: 1.1282 


[1/119]   1%|           [00:00<?]

2025-08-11 02:39:25,576 - INFO - Epoch: 163/500, Iter: 2/119 -- train_loss: 1.1697 
2025-08-11 02:39:25,863 - INFO - Epoch: 163/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 02:39:26,156 - INFO - Epoch: 163/500, Iter: 4/119 -- train_loss: 1.1754 
2025-08-11 02:39:26,462 - INFO - Epoch: 163/500, Iter: 5/119 -- train_loss: 1.1747 
2025-08-11 02:39:26,807 - INFO - Epoch: 163/500, Iter: 6/119 -- train_loss: 1.1745 
2025-08-11 02:39:27,101 - INFO - Epoch: 163/500, Iter: 7/119 -- train_loss: 1.1540 
2025-08-11 02:39:27,440 - INFO - Epoch: 163/500, Iter: 8/119 -- train_loss: 1.1633 
2025-08-11 02:39:39,439 - INFO - Epoch: 163/500, Iter: 9/119 -- train_loss: 1.1717 
2025-08-11 02:39:39,738 - INFO - Epoch: 163/500, Iter: 10/119 -- train_loss: 1.1611 
2025-08-11 02:39:39,987 - INFO - Epoch: 163/500, Iter: 11/119 -- train_loss: 1.1745 
2025-08-11 02:39:40,269 - INFO - Epoch: 163/500, Iter: 12/119 -- train_loss: 1.1422 
2025-08-11 02:39:40,552 - INFO - Epoch: 163/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:43:17,385 - INFO - Epoch: 164/500, Iter: 1/119 -- train_loss: 1.1736 


[1/119]   1%|           [00:00<?]

2025-08-11 02:43:20,299 - INFO - Epoch: 164/500, Iter: 2/119 -- train_loss: 1.1415 
2025-08-11 02:43:20,587 - INFO - Epoch: 164/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 02:43:20,884 - INFO - Epoch: 164/500, Iter: 4/119 -- train_loss: 1.1755 
2025-08-11 02:43:21,188 - INFO - Epoch: 164/500, Iter: 5/119 -- train_loss: 1.1369 
2025-08-11 02:43:21,863 - INFO - Epoch: 164/500, Iter: 6/119 -- train_loss: 1.1745 
2025-08-11 02:43:22,129 - INFO - Epoch: 164/500, Iter: 7/119 -- train_loss: 1.1744 
2025-08-11 02:43:22,436 - INFO - Epoch: 164/500, Iter: 8/119 -- train_loss: 1.0655 
2025-08-11 02:43:23,311 - INFO - Epoch: 164/500, Iter: 9/119 -- train_loss: 1.1744 
2025-08-11 02:43:28,131 - INFO - Epoch: 164/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 02:43:28,423 - INFO - Epoch: 164/500, Iter: 11/119 -- train_loss: 1.1750 
2025-08-11 02:43:28,720 - INFO - Epoch: 164/500, Iter: 12/119 -- train_loss: 1.1763 
2025-08-11 02:43:29,056 - INFO - Epoch: 164/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:47:05,683 - INFO - Epoch: 165/500, Iter: 1/119 -- train_loss: 1.1193 


[1/119]   1%|           [00:00<?]

2025-08-11 02:47:06,026 - INFO - Epoch: 165/500, Iter: 2/119 -- train_loss: 1.1113 
2025-08-11 02:47:06,333 - INFO - Epoch: 165/500, Iter: 3/119 -- train_loss: 1.1729 
2025-08-11 02:47:06,650 - INFO - Epoch: 165/500, Iter: 4/119 -- train_loss: 1.1792 
2025-08-11 02:47:06,949 - INFO - Epoch: 165/500, Iter: 5/119 -- train_loss: 1.1576 
2025-08-11 02:47:07,260 - INFO - Epoch: 165/500, Iter: 6/119 -- train_loss: 1.0733 
2025-08-11 02:47:07,576 - INFO - Epoch: 165/500, Iter: 7/119 -- train_loss: 1.1386 
2025-08-11 02:47:07,850 - INFO - Epoch: 165/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 02:47:15,482 - INFO - Epoch: 165/500, Iter: 9/119 -- train_loss: 1.1385 
2025-08-11 02:47:15,783 - INFO - Epoch: 165/500, Iter: 10/119 -- train_loss: 1.1670 
2025-08-11 02:47:16,083 - INFO - Epoch: 165/500, Iter: 11/119 -- train_loss: 1.1696 
2025-08-11 02:47:16,366 - INFO - Epoch: 165/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 02:47:16,652 - INFO - Epoch: 165/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:50:58,666 - INFO - Epoch: 166/500, Iter: 1/119 -- train_loss: 1.1769 


[1/119]   1%|           [00:00<?]

2025-08-11 02:50:58,968 - INFO - Epoch: 166/500, Iter: 2/119 -- train_loss: 1.1059 
2025-08-11 02:50:59,249 - INFO - Epoch: 166/500, Iter: 3/119 -- train_loss: 1.1513 
2025-08-11 02:50:59,527 - INFO - Epoch: 166/500, Iter: 4/119 -- train_loss: 1.1745 
2025-08-11 02:50:59,831 - INFO - Epoch: 166/500, Iter: 5/119 -- train_loss: 1.1842 
2025-08-11 02:51:00,138 - INFO - Epoch: 166/500, Iter: 6/119 -- train_loss: 1.1428 
2025-08-11 02:51:00,446 - INFO - Epoch: 166/500, Iter: 7/119 -- train_loss: 1.1671 
2025-08-11 02:51:00,775 - INFO - Epoch: 166/500, Iter: 8/119 -- train_loss: 1.0982 
2025-08-11 02:51:03,126 - INFO - Epoch: 166/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 02:51:03,459 - INFO - Epoch: 166/500, Iter: 10/119 -- train_loss: 1.0339 
2025-08-11 02:51:10,203 - INFO - Epoch: 166/500, Iter: 11/119 -- train_loss: 1.1400 
2025-08-11 02:51:10,482 - INFO - Epoch: 166/500, Iter: 12/119 -- train_loss: 1.1708 
2025-08-11 02:51:10,782 - INFO - Epoch: 166/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:54:44,960 - INFO - Epoch: 167/500, Iter: 1/119 -- train_loss: 1.1717 


[1/119]   1%|           [00:00<?]

2025-08-11 02:54:48,556 - INFO - Epoch: 167/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 02:54:48,866 - INFO - Epoch: 167/500, Iter: 3/119 -- train_loss: 1.1535 
2025-08-11 02:54:49,120 - INFO - Epoch: 167/500, Iter: 4/119 -- train_loss: 1.1552 
2025-08-11 02:54:49,410 - INFO - Epoch: 167/500, Iter: 5/119 -- train_loss: 1.1615 
2025-08-11 02:54:49,725 - INFO - Epoch: 167/500, Iter: 6/119 -- train_loss: 1.1306 
2025-08-11 02:54:50,041 - INFO - Epoch: 167/500, Iter: 7/119 -- train_loss: 1.1705 
2025-08-11 02:54:50,351 - INFO - Epoch: 167/500, Iter: 8/119 -- train_loss: 1.1744 
2025-08-11 02:54:50,673 - INFO - Epoch: 167/500, Iter: 9/119 -- train_loss: 1.0815 
2025-08-11 02:54:53,460 - INFO - Epoch: 167/500, Iter: 10/119 -- train_loss: 1.1740 
2025-08-11 02:54:53,754 - INFO - Epoch: 167/500, Iter: 11/119 -- train_loss: 1.1792 
2025-08-11 02:54:54,061 - INFO - Epoch: 167/500, Iter: 12/119 -- train_loss: 1.1566 
2025-08-11 02:54:57,030 - INFO - Epoch: 167/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 02:58:25,652 - INFO - Epoch: 168/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 02:58:32,067 - INFO - Epoch: 168/500, Iter: 2/119 -- train_loss: 1.1728 
2025-08-11 02:58:32,349 - INFO - Epoch: 168/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 02:58:32,652 - INFO - Epoch: 168/500, Iter: 4/119 -- train_loss: 1.1646 
2025-08-11 02:58:32,937 - INFO - Epoch: 168/500, Iter: 5/119 -- train_loss: 1.1715 
2025-08-11 02:58:33,228 - INFO - Epoch: 168/500, Iter: 6/119 -- train_loss: 1.1319 
2025-08-11 02:58:33,528 - INFO - Epoch: 168/500, Iter: 7/119 -- train_loss: 1.1552 
2025-08-11 02:58:33,846 - INFO - Epoch: 168/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 02:58:34,194 - INFO - Epoch: 168/500, Iter: 9/119 -- train_loss: 1.1543 
2025-08-11 02:58:36,437 - INFO - Epoch: 168/500, Iter: 10/119 -- train_loss: 1.1313 
2025-08-11 02:58:36,786 - INFO - Epoch: 168/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-11 02:58:37,094 - INFO - Epoch: 168/500, Iter: 12/119 -- train_loss: 1.1713 
2025-08-11 02:58:37,392 - INFO - Epoch: 168/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 03:05:59,295 - INFO - Epoch: 169/500, Iter: 1/119 -- train_loss: 1.1825 


[1/119]   1%|           [00:00<?]

2025-08-11 03:06:01,765 - INFO - Epoch: 169/500, Iter: 2/119 -- train_loss: 1.1707 
2025-08-11 03:06:06,655 - INFO - Epoch: 169/500, Iter: 3/119 -- train_loss: 1.1717 
2025-08-11 03:06:07,125 - INFO - Epoch: 169/500, Iter: 4/119 -- train_loss: 1.1758 
2025-08-11 03:06:10,354 - INFO - Epoch: 169/500, Iter: 5/119 -- train_loss: 1.1626 
2025-08-11 03:06:10,889 - INFO - Epoch: 169/500, Iter: 6/119 -- train_loss: 1.1744 
2025-08-11 03:06:11,380 - INFO - Epoch: 169/500, Iter: 7/119 -- train_loss: 1.1559 
2025-08-11 03:06:11,841 - INFO - Epoch: 169/500, Iter: 8/119 -- train_loss: 1.1744 
2025-08-11 03:06:12,310 - INFO - Epoch: 169/500, Iter: 9/119 -- train_loss: 1.1808 
2025-08-11 03:06:15,602 - INFO - Epoch: 169/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 03:06:22,711 - INFO - Epoch: 169/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-11 03:06:24,728 - INFO - Epoch: 169/500, Iter: 12/119 -- train_loss: 1.1197 
2025-08-11 03:06:33,397 - INFO - Epoch: 169/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 03:14:51,996 - INFO - Epoch: 170/500, Iter: 1/119 -- train_loss: 1.1210 


[1/119]   1%|           [00:00<?]

2025-08-11 03:14:52,466 - INFO - Epoch: 170/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-11 03:14:53,009 - INFO - Epoch: 170/500, Iter: 3/119 -- train_loss: 1.1716 
2025-08-11 03:14:58,856 - INFO - Epoch: 170/500, Iter: 4/119 -- train_loss: 1.1777 
2025-08-11 03:14:59,364 - INFO - Epoch: 170/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 03:14:59,848 - INFO - Epoch: 170/500, Iter: 6/119 -- train_loss: 1.0520 
2025-08-11 03:15:00,573 - INFO - Epoch: 170/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 03:15:01,076 - INFO - Epoch: 170/500, Iter: 8/119 -- train_loss: 1.1746 
2025-08-11 03:15:01,571 - INFO - Epoch: 170/500, Iter: 9/119 -- train_loss: 1.1029 
2025-08-11 03:15:03,200 - INFO - Epoch: 170/500, Iter: 10/119 -- train_loss: 1.1788 
2025-08-11 03:15:04,413 - INFO - Epoch: 170/500, Iter: 11/119 -- train_loss: 1.1271 
2025-08-11 03:15:07,193 - INFO - Epoch: 170/500, Iter: 12/119 -- train_loss: 1.1754 
2025-08-11 03:15:17,238 - INFO - Epoch: 170/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 03:23:28,663 - INFO - Epoch: 171/500, Iter: 1/119 -- train_loss: 1.1494 


[1/119]   1%|           [00:00<?]

2025-08-11 03:23:30,720 - INFO - Epoch: 171/500, Iter: 2/119 -- train_loss: 1.1495 
2025-08-11 03:23:33,991 - INFO - Epoch: 171/500, Iter: 3/119 -- train_loss: 1.1697 
2025-08-11 03:23:34,474 - INFO - Epoch: 171/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 03:23:34,933 - INFO - Epoch: 171/500, Iter: 5/119 -- train_loss: 1.0702 
2025-08-11 03:23:35,424 - INFO - Epoch: 171/500, Iter: 6/119 -- train_loss: 1.1770 
2025-08-11 03:23:40,899 - INFO - Epoch: 171/500, Iter: 7/119 -- train_loss: 1.1014 
2025-08-11 03:23:41,447 - INFO - Epoch: 171/500, Iter: 8/119 -- train_loss: 1.1541 
2025-08-11 03:23:42,563 - INFO - Epoch: 171/500, Iter: 9/119 -- train_loss: 1.1575 
2025-08-11 03:23:43,108 - INFO - Epoch: 171/500, Iter: 10/119 -- train_loss: 1.1558 
2025-08-11 03:23:43,591 - INFO - Epoch: 171/500, Iter: 11/119 -- train_loss: 1.1754 
2025-08-11 03:23:46,607 - INFO - Epoch: 171/500, Iter: 12/119 -- train_loss: 1.1822 
2025-08-11 03:23:47,090 - INFO - Epoch: 171/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 03:32:27,940 - INFO - Epoch: 172/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 03:32:30,614 - INFO - Epoch: 172/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 03:32:39,513 - INFO - Epoch: 172/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 03:32:40,052 - INFO - Epoch: 172/500, Iter: 4/119 -- train_loss: 1.1435 
2025-08-11 03:32:40,569 - INFO - Epoch: 172/500, Iter: 5/119 -- train_loss: 1.1622 
2025-08-11 03:32:41,089 - INFO - Epoch: 172/500, Iter: 6/119 -- train_loss: 1.1762 
2025-08-11 03:32:41,561 - INFO - Epoch: 172/500, Iter: 7/119 -- train_loss: 1.0501 
2025-08-11 03:32:42,042 - INFO - Epoch: 172/500, Iter: 8/119 -- train_loss: 1.1684 
2025-08-11 03:32:44,122 - INFO - Epoch: 172/500, Iter: 9/119 -- train_loss: 1.1745 
2025-08-11 03:32:44,579 - INFO - Epoch: 172/500, Iter: 10/119 -- train_loss: 0.9917 
2025-08-11 03:32:49,146 - INFO - Epoch: 172/500, Iter: 11/119 -- train_loss: 1.1745 
2025-08-11 03:32:49,634 - INFO - Epoch: 172/500, Iter: 12/119 -- train_loss: 1.1042 
2025-08-11 03:32:50,169 - INFO - Epoch: 172/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 03:41:10,771 - INFO - Epoch: 173/500, Iter: 1/119 -- train_loss: 1.1896 


[1/119]   1%|           [00:00<?]

2025-08-11 03:41:11,288 - INFO - Epoch: 173/500, Iter: 2/119 -- train_loss: 1.1561 
2025-08-11 03:41:11,722 - INFO - Epoch: 173/500, Iter: 3/119 -- train_loss: 1.0806 
2025-08-11 03:41:12,296 - INFO - Epoch: 173/500, Iter: 4/119 -- train_loss: 0.8847 
2025-08-11 03:41:13,389 - INFO - Epoch: 173/500, Iter: 5/119 -- train_loss: 1.1581 
2025-08-11 03:41:13,870 - INFO - Epoch: 173/500, Iter: 6/119 -- train_loss: 1.1929 
2025-08-11 03:41:18,754 - INFO - Epoch: 173/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 03:41:19,227 - INFO - Epoch: 173/500, Iter: 8/119 -- train_loss: 1.1212 
2025-08-11 03:41:19,732 - INFO - Epoch: 173/500, Iter: 9/119 -- train_loss: 1.1681 
2025-08-11 03:41:20,183 - INFO - Epoch: 173/500, Iter: 10/119 -- train_loss: 1.1720 
2025-08-11 03:41:20,717 - INFO - Epoch: 173/500, Iter: 11/119 -- train_loss: 1.1507 
2025-08-11 03:41:21,158 - INFO - Epoch: 173/500, Iter: 12/119 -- train_loss: 1.1259 
2025-08-11 03:41:30,376 - INFO - Epoch: 173/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 03:49:51,816 - INFO - Epoch: 174/500, Iter: 1/119 -- train_loss: 1.1578 


[1/119]   1%|           [00:00<?]

2025-08-11 03:49:52,323 - INFO - Epoch: 174/500, Iter: 2/119 -- train_loss: 1.1371 
2025-08-11 03:50:02,609 - INFO - Epoch: 174/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 03:50:03,129 - INFO - Epoch: 174/500, Iter: 4/119 -- train_loss: 1.1750 
2025-08-11 03:50:03,610 - INFO - Epoch: 174/500, Iter: 5/119 -- train_loss: 1.1686 
2025-08-11 03:50:04,107 - INFO - Epoch: 174/500, Iter: 6/119 -- train_loss: 1.1753 
2025-08-11 03:50:04,592 - INFO - Epoch: 174/500, Iter: 7/119 -- train_loss: 1.1253 
2025-08-11 03:50:05,099 - INFO - Epoch: 174/500, Iter: 8/119 -- train_loss: 1.1751 
2025-08-11 03:50:05,605 - INFO - Epoch: 174/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 03:50:06,057 - INFO - Epoch: 174/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 03:50:19,572 - INFO - Epoch: 174/500, Iter: 11/119 -- train_loss: 1.1731 
2025-08-11 03:50:20,078 - INFO - Epoch: 174/500, Iter: 12/119 -- train_loss: 1.1617 
2025-08-11 03:50:20,536 - INFO - Epoch: 174/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 03:58:34,606 - INFO - Epoch: 175/500, Iter: 1/119 -- train_loss: 1.1651 


[1/119]   1%|           [00:00<?]

2025-08-11 03:58:35,104 - INFO - Epoch: 175/500, Iter: 2/119 -- train_loss: 1.1833 
2025-08-11 03:58:38,481 - INFO - Epoch: 175/500, Iter: 3/119 -- train_loss: 1.1449 
2025-08-11 03:58:38,981 - INFO - Epoch: 175/500, Iter: 4/119 -- train_loss: 1.1746 
2025-08-11 03:58:39,493 - INFO - Epoch: 175/500, Iter: 5/119 -- train_loss: 1.0822 
2025-08-11 03:58:39,968 - INFO - Epoch: 175/500, Iter: 6/119 -- train_loss: 1.1723 
2025-08-11 03:58:40,447 - INFO - Epoch: 175/500, Iter: 7/119 -- train_loss: 1.1532 
2025-08-11 03:58:40,942 - INFO - Epoch: 175/500, Iter: 8/119 -- train_loss: 1.1379 
2025-08-11 03:58:44,573 - INFO - Epoch: 175/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 03:58:58,830 - INFO - Epoch: 175/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 03:58:59,313 - INFO - Epoch: 175/500, Iter: 11/119 -- train_loss: 1.1586 
2025-08-11 03:58:59,779 - INFO - Epoch: 175/500, Iter: 12/119 -- train_loss: 1.1496 
2025-08-11 03:59:00,220 - INFO - Epoch: 175/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 04:06:50,841 - INFO - Epoch: 176/500, Iter: 1/119 -- train_loss: 1.1145 


[1/119]   1%|           [00:00<?]

2025-08-11 04:07:18,190 - INFO - Epoch: 176/500, Iter: 2/119 -- train_loss: 1.1712 
2025-08-11 04:07:18,684 - INFO - Epoch: 176/500, Iter: 3/119 -- train_loss: 1.1748 
2025-08-11 04:07:19,143 - INFO - Epoch: 176/500, Iter: 4/119 -- train_loss: 1.0620 
2025-08-11 04:07:19,588 - INFO - Epoch: 176/500, Iter: 5/119 -- train_loss: 1.1748 
2025-08-11 04:07:20,082 - INFO - Epoch: 176/500, Iter: 6/119 -- train_loss: 1.1539 
2025-08-11 04:07:20,588 - INFO - Epoch: 176/500, Iter: 7/119 -- train_loss: 1.1776 
2025-08-11 04:07:21,578 - INFO - Epoch: 176/500, Iter: 8/119 -- train_loss: 1.1679 
2025-08-11 04:07:22,045 - INFO - Epoch: 176/500, Iter: 9/119 -- train_loss: 1.1758 
2025-08-11 04:07:28,233 - INFO - Epoch: 176/500, Iter: 10/119 -- train_loss: 1.1704 
2025-08-11 04:07:28,754 - INFO - Epoch: 176/500, Iter: 11/119 -- train_loss: 1.1219 
2025-08-11 04:07:29,206 - INFO - Epoch: 176/500, Iter: 12/119 -- train_loss: 1.1066 
2025-08-11 04:07:29,656 - INFO - Epoch: 176/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 04:15:43,432 - INFO - Epoch: 177/500, Iter: 1/119 -- train_loss: 1.1213 


[1/119]   1%|           [00:00<?]

2025-08-11 04:15:51,649 - INFO - Epoch: 177/500, Iter: 2/119 -- train_loss: 1.1327 
2025-08-11 04:15:52,152 - INFO - Epoch: 177/500, Iter: 3/119 -- train_loss: 1.1762 
2025-08-11 04:15:52,661 - INFO - Epoch: 177/500, Iter: 4/119 -- train_loss: 1.1649 
2025-08-11 04:15:53,134 - INFO - Epoch: 177/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 04:15:53,595 - INFO - Epoch: 177/500, Iter: 6/119 -- train_loss: 1.1743 
2025-08-11 04:15:54,049 - INFO - Epoch: 177/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 04:15:54,524 - INFO - Epoch: 177/500, Iter: 8/119 -- train_loss: 1.1745 
2025-08-11 04:15:55,017 - INFO - Epoch: 177/500, Iter: 9/119 -- train_loss: 1.1144 
2025-08-11 04:16:00,611 - INFO - Epoch: 177/500, Iter: 10/119 -- train_loss: 1.1479 
2025-08-11 04:16:08,191 - INFO - Epoch: 177/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 04:16:08,647 - INFO - Epoch: 177/500, Iter: 12/119 -- train_loss: 1.1722 
2025-08-11 04:16:09,127 - INFO - Epoch: 177/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 04:24:10,599 - INFO - Epoch: 178/500, Iter: 1/119 -- train_loss: 1.1521 


[1/119]   1%|           [00:00<?]

2025-08-11 04:24:16,595 - INFO - Epoch: 178/500, Iter: 2/119 -- train_loss: 1.1163 
2025-08-11 04:24:17,508 - INFO - Epoch: 178/500, Iter: 3/119 -- train_loss: 1.1508 
2025-08-11 04:24:17,946 - INFO - Epoch: 178/500, Iter: 4/119 -- train_loss: 1.1277 
2025-08-11 04:24:18,412 - INFO - Epoch: 178/500, Iter: 5/119 -- train_loss: 1.1750 
2025-08-11 04:24:18,917 - INFO - Epoch: 178/500, Iter: 6/119 -- train_loss: 1.1523 
2025-08-11 04:24:19,399 - INFO - Epoch: 178/500, Iter: 7/119 -- train_loss: 1.1612 
2025-08-11 04:24:19,884 - INFO - Epoch: 178/500, Iter: 8/119 -- train_loss: 1.1718 
2025-08-11 04:24:22,007 - INFO - Epoch: 178/500, Iter: 9/119 -- train_loss: 1.1797 
2025-08-11 04:24:27,775 - INFO - Epoch: 178/500, Iter: 10/119 -- train_loss: 1.1446 
2025-08-11 04:24:29,126 - INFO - Epoch: 178/500, Iter: 11/119 -- train_loss: 1.1617 
2025-08-11 04:24:29,630 - INFO - Epoch: 178/500, Iter: 12/119 -- train_loss: 1.1769 
2025-08-11 04:24:30,375 - INFO - Epoch: 178/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 04:32:45,226 - INFO - Epoch: 179/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 04:32:48,767 - INFO - Epoch: 179/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 04:32:50,351 - INFO - Epoch: 179/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 04:32:53,945 - INFO - Epoch: 179/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 04:32:59,767 - INFO - Epoch: 179/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 04:33:00,253 - INFO - Epoch: 179/500, Iter: 6/119 -- train_loss: 1.1750 
2025-08-11 04:33:00,747 - INFO - Epoch: 179/500, Iter: 7/119 -- train_loss: 1.1288 
2025-08-11 04:33:01,242 - INFO - Epoch: 179/500, Iter: 8/119 -- train_loss: 1.1725 
2025-08-11 04:33:02,372 - INFO - Epoch: 179/500, Iter: 9/119 -- train_loss: 1.1690 
2025-08-11 04:33:03,217 - INFO - Epoch: 179/500, Iter: 10/119 -- train_loss: 1.1614 
2025-08-11 04:33:03,668 - INFO - Epoch: 179/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 04:33:04,190 - INFO - Epoch: 179/500, Iter: 12/119 -- train_loss: 1.1641 
2025-08-11 04:33:14,535 - INFO - Epoch: 179/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 04:41:36,593 - INFO - Epoch: 180/500, Iter: 1/119 -- train_loss: 1.0200 


[1/119]   1%|           [00:00<?]

2025-08-11 04:41:37,108 - INFO - Epoch: 180/500, Iter: 2/119 -- train_loss: 1.1789 
2025-08-11 04:41:37,628 - INFO - Epoch: 180/500, Iter: 3/119 -- train_loss: 1.1776 
2025-08-11 04:41:38,116 - INFO - Epoch: 180/500, Iter: 4/119 -- train_loss: 1.1433 
2025-08-11 04:41:38,586 - INFO - Epoch: 180/500, Iter: 5/119 -- train_loss: 1.1718 
2025-08-11 04:41:42,429 - INFO - Epoch: 180/500, Iter: 6/119 -- train_loss: 1.0547 
2025-08-11 04:41:42,868 - INFO - Epoch: 180/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 04:41:43,577 - INFO - Epoch: 180/500, Iter: 8/119 -- train_loss: 1.1719 
2025-08-11 04:41:53,002 - INFO - Epoch: 180/500, Iter: 9/119 -- train_loss: 1.1770 
2025-08-11 04:41:53,516 - INFO - Epoch: 180/500, Iter: 10/119 -- train_loss: 1.1744 
2025-08-11 04:41:53,991 - INFO - Epoch: 180/500, Iter: 11/119 -- train_loss: 1.1304 
2025-08-11 04:41:54,446 - INFO - Epoch: 180/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 04:41:54,932 - INFO - Epoch: 180/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 04:50:39,973 - INFO - Epoch: 181/500, Iter: 1/119 -- train_loss: 1.1515 


[1/119]   1%|           [00:00<?]

2025-08-11 04:50:40,473 - INFO - Epoch: 181/500, Iter: 2/119 -- train_loss: 1.1109 
2025-08-11 04:50:40,924 - INFO - Epoch: 181/500, Iter: 3/119 -- train_loss: 1.1748 
2025-08-11 04:50:41,362 - INFO - Epoch: 181/500, Iter: 4/119 -- train_loss: 1.1616 
2025-08-11 04:50:41,825 - INFO - Epoch: 181/500, Iter: 5/119 -- train_loss: 1.1699 
2025-08-11 04:50:42,315 - INFO - Epoch: 181/500, Iter: 6/119 -- train_loss: 1.1414 
2025-08-11 04:50:42,889 - INFO - Epoch: 181/500, Iter: 7/119 -- train_loss: 1.1749 
2025-08-11 04:50:43,423 - INFO - Epoch: 181/500, Iter: 8/119 -- train_loss: 1.1227 
2025-08-11 04:50:50,338 - INFO - Epoch: 181/500, Iter: 9/119 -- train_loss: 1.1791 
2025-08-11 04:50:51,988 - INFO - Epoch: 181/500, Iter: 10/119 -- train_loss: 1.1736 
2025-08-11 04:50:52,496 - INFO - Epoch: 181/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-11 04:50:52,961 - INFO - Epoch: 181/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 04:50:53,472 - INFO - Epoch: 181/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 04:59:28,213 - INFO - Epoch: 182/500, Iter: 1/119 -- train_loss: 1.1340 


[1/119]   1%|           [00:00<?]

2025-08-11 04:59:45,178 - INFO - Epoch: 182/500, Iter: 2/119 -- train_loss: 1.1721 
2025-08-11 04:59:45,700 - INFO - Epoch: 182/500, Iter: 3/119 -- train_loss: 1.1525 
2025-08-11 04:59:46,239 - INFO - Epoch: 182/500, Iter: 4/119 -- train_loss: 1.1748 
2025-08-11 04:59:46,725 - INFO - Epoch: 182/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 04:59:47,234 - INFO - Epoch: 182/500, Iter: 6/119 -- train_loss: 1.1743 
2025-08-11 04:59:47,751 - INFO - Epoch: 182/500, Iter: 7/119 -- train_loss: 1.1686 
2025-08-11 04:59:48,266 - INFO - Epoch: 182/500, Iter: 8/119 -- train_loss: 1.1823 
2025-08-11 04:59:48,762 - INFO - Epoch: 182/500, Iter: 9/119 -- train_loss: 1.1585 
2025-08-11 04:59:56,206 - INFO - Epoch: 182/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 04:59:56,686 - INFO - Epoch: 182/500, Iter: 11/119 -- train_loss: 1.1421 
2025-08-11 04:59:57,152 - INFO - Epoch: 182/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 04:59:57,647 - INFO - Epoch: 182/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 05:08:07,813 - INFO - Epoch: 183/500, Iter: 1/119 -- train_loss: 1.1745 


[1/119]   1%|           [00:00<?]

2025-08-11 05:08:08,364 - INFO - Epoch: 183/500, Iter: 2/119 -- train_loss: 1.1745 
2025-08-11 05:08:08,883 - INFO - Epoch: 183/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 05:08:09,385 - INFO - Epoch: 183/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 05:08:09,916 - INFO - Epoch: 183/500, Iter: 5/119 -- train_loss: 1.1721 
2025-08-11 05:08:10,427 - INFO - Epoch: 183/500, Iter: 6/119 -- train_loss: 1.1638 
2025-08-11 05:08:10,937 - INFO - Epoch: 183/500, Iter: 7/119 -- train_loss: 1.1648 
2025-08-11 05:08:11,427 - INFO - Epoch: 183/500, Iter: 8/119 -- train_loss: 1.1363 
2025-08-11 05:08:19,200 - INFO - Epoch: 183/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 05:08:19,706 - INFO - Epoch: 183/500, Iter: 10/119 -- train_loss: 1.1451 
2025-08-11 05:08:20,174 - INFO - Epoch: 183/500, Iter: 11/119 -- train_loss: 1.1750 
2025-08-11 05:08:20,664 - INFO - Epoch: 183/500, Iter: 12/119 -- train_loss: 1.1679 
2025-08-11 05:08:21,144 - INFO - Epoch: 183/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 05:16:58,789 - INFO - Epoch: 184/500, Iter: 1/119 -- train_loss: 1.0973 


[1/119]   1%|           [00:00<?]

2025-08-11 05:17:03,605 - INFO - Epoch: 184/500, Iter: 2/119 -- train_loss: 1.1768 
2025-08-11 05:17:15,988 - INFO - Epoch: 184/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 05:17:16,427 - INFO - Epoch: 184/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 05:17:16,924 - INFO - Epoch: 184/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 05:17:17,388 - INFO - Epoch: 184/500, Iter: 6/119 -- train_loss: 1.1687 
2025-08-11 05:17:17,900 - INFO - Epoch: 184/500, Iter: 7/119 -- train_loss: 1.1068 
2025-08-11 05:17:18,373 - INFO - Epoch: 184/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 05:17:18,840 - INFO - Epoch: 184/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 05:17:22,968 - INFO - Epoch: 184/500, Iter: 10/119 -- train_loss: 1.1805 
2025-08-11 05:17:32,826 - INFO - Epoch: 184/500, Iter: 11/119 -- train_loss: 1.1526 
2025-08-11 05:17:35,804 - INFO - Epoch: 184/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 05:17:36,271 - INFO - Epoch: 184/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 05:26:19,632 - INFO - Epoch: 185/500, Iter: 1/119 -- train_loss: 1.1637 


[1/119]   1%|           [00:00<?]

2025-08-11 05:26:21,515 - INFO - Epoch: 185/500, Iter: 2/119 -- train_loss: 1.1236 
2025-08-11 05:26:23,399 - INFO - Epoch: 185/500, Iter: 3/119 -- train_loss: 1.1651 
2025-08-11 05:26:25,240 - INFO - Epoch: 185/500, Iter: 4/119 -- train_loss: 0.9321 
2025-08-11 05:26:27,150 - INFO - Epoch: 185/500, Iter: 5/119 -- train_loss: 1.1652 
2025-08-11 05:26:29,021 - INFO - Epoch: 185/500, Iter: 6/119 -- train_loss: 1.0936 
2025-08-11 05:26:30,848 - INFO - Epoch: 185/500, Iter: 7/119 -- train_loss: 1.1327 
2025-08-11 05:26:32,748 - INFO - Epoch: 185/500, Iter: 8/119 -- train_loss: 1.1645 
2025-08-11 05:26:34,648 - INFO - Epoch: 185/500, Iter: 9/119 -- train_loss: 1.1681 
2025-08-11 05:26:36,608 - INFO - Epoch: 185/500, Iter: 10/119 -- train_loss: 1.1865 
2025-08-11 05:26:38,501 - INFO - Epoch: 185/500, Iter: 11/119 -- train_loss: 1.1785 
2025-08-11 05:26:40,391 - INFO - Epoch: 185/500, Iter: 12/119 -- train_loss: 1.1591 
2025-08-11 05:26:42,304 - INFO - Epoch: 185/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 05:35:46,163 - INFO - Epoch: 186/500, Iter: 1/119 -- train_loss: 1.1724 


[1/119]   1%|           [00:00<?]

2025-08-11 05:35:48,098 - INFO - Epoch: 186/500, Iter: 2/119 -- train_loss: 1.1762 
2025-08-11 05:35:49,947 - INFO - Epoch: 186/500, Iter: 3/119 -- train_loss: 1.1673 
2025-08-11 05:35:51,822 - INFO - Epoch: 186/500, Iter: 4/119 -- train_loss: 1.1600 
2025-08-11 05:35:53,675 - INFO - Epoch: 186/500, Iter: 5/119 -- train_loss: 1.1440 
2025-08-11 05:35:55,518 - INFO - Epoch: 186/500, Iter: 6/119 -- train_loss: 1.1644 
2025-08-11 05:35:57,358 - INFO - Epoch: 186/500, Iter: 7/119 -- train_loss: 1.1073 
2025-08-11 05:35:59,193 - INFO - Epoch: 186/500, Iter: 8/119 -- train_loss: 1.1671 
2025-08-11 05:36:01,105 - INFO - Epoch: 186/500, Iter: 9/119 -- train_loss: 1.1590 
2025-08-11 05:36:02,946 - INFO - Epoch: 186/500, Iter: 10/119 -- train_loss: 0.9724 
2025-08-11 05:36:04,824 - INFO - Epoch: 186/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 05:36:06,697 - INFO - Epoch: 186/500, Iter: 12/119 -- train_loss: 1.1280 
2025-08-11 05:36:08,556 - INFO - Epoch: 186/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 05:44:50,770 - INFO - Epoch: 187/500, Iter: 1/119 -- train_loss: 1.1793 


[1/119]   1%|           [00:00<?]

2025-08-11 05:44:52,677 - INFO - Epoch: 187/500, Iter: 2/119 -- train_loss: 1.1746 
2025-08-11 05:44:54,599 - INFO - Epoch: 187/500, Iter: 3/119 -- train_loss: 1.1690 
2025-08-11 05:44:56,888 - INFO - Epoch: 187/500, Iter: 4/119 -- train_loss: 1.1013 
2025-08-11 05:44:58,755 - INFO - Epoch: 187/500, Iter: 5/119 -- train_loss: 1.1780 
2025-08-11 05:45:07,076 - INFO - Epoch: 187/500, Iter: 6/119 -- train_loss: 1.1771 
2025-08-11 05:45:08,919 - INFO - Epoch: 187/500, Iter: 7/119 -- train_loss: 1.1754 
2025-08-11 05:45:10,792 - INFO - Epoch: 187/500, Iter: 8/119 -- train_loss: 1.1746 
2025-08-11 05:45:12,658 - INFO - Epoch: 187/500, Iter: 9/119 -- train_loss: 1.1746 
2025-08-11 05:45:14,503 - INFO - Epoch: 187/500, Iter: 10/119 -- train_loss: 1.1747 
2025-08-11 05:45:16,352 - INFO - Epoch: 187/500, Iter: 11/119 -- train_loss: 1.0788 
2025-08-11 05:45:18,238 - INFO - Epoch: 187/500, Iter: 12/119 -- train_loss: 1.1458 
2025-08-11 05:45:20,112 - INFO - Epoch: 187/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 08:46:48,762 - INFO - Epoch: 188/500, Iter: 1/119 -- train_loss: 1.1854 


[1/119]   1%|           [00:00<?]

2025-08-11 08:46:50,546 - INFO - Epoch: 188/500, Iter: 2/119 -- train_loss: 1.1709 
2025-08-11 08:46:53,876 - INFO - Epoch: 188/500, Iter: 3/119 -- train_loss: 1.1853 
2025-08-11 08:46:55,621 - INFO - Epoch: 188/500, Iter: 4/119 -- train_loss: 1.1532 
2025-08-11 08:46:57,379 - INFO - Epoch: 188/500, Iter: 5/119 -- train_loss: 1.1695 
2025-08-11 08:46:59,120 - INFO - Epoch: 188/500, Iter: 6/119 -- train_loss: 1.1512 
2025-08-11 08:47:00,882 - INFO - Epoch: 188/500, Iter: 7/119 -- train_loss: 1.1760 
2025-08-11 08:47:02,631 - INFO - Epoch: 188/500, Iter: 8/119 -- train_loss: 0.9863 
2025-08-11 08:47:04,500 - INFO - Epoch: 188/500, Iter: 9/119 -- train_loss: 1.1791 
2025-08-11 08:47:06,219 - INFO - Epoch: 188/500, Iter: 10/119 -- train_loss: 1.1761 
2025-08-11 08:47:07,963 - INFO - Epoch: 188/500, Iter: 11/119 -- train_loss: 1.1429 
2025-08-11 08:47:09,696 - INFO - Epoch: 188/500, Iter: 12/119 -- train_loss: 1.1878 
2025-08-11 08:47:11,457 - INFO - Epoch: 188/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 08:52:25,874 - INFO - Epoch: 189/500, Iter: 1/119 -- train_loss: 1.1598 


[1/119]   1%|           [00:00<?]

2025-08-11 08:52:31,488 - INFO - Epoch: 189/500, Iter: 2/119 -- train_loss: 1.1559 
2025-08-11 08:52:33,984 - INFO - Epoch: 189/500, Iter: 3/119 -- train_loss: 1.1761 
2025-08-11 08:52:35,734 - INFO - Epoch: 189/500, Iter: 4/119 -- train_loss: 1.1647 
2025-08-11 08:52:37,467 - INFO - Epoch: 189/500, Iter: 5/119 -- train_loss: 1.1252 
2025-08-11 08:52:39,205 - INFO - Epoch: 189/500, Iter: 6/119 -- train_loss: 1.1755 
2025-08-11 08:52:40,955 - INFO - Epoch: 189/500, Iter: 7/119 -- train_loss: 1.1660 
2025-08-11 08:52:42,683 - INFO - Epoch: 189/500, Iter: 8/119 -- train_loss: 1.1794 
2025-08-11 08:52:44,466 - INFO - Epoch: 189/500, Iter: 9/119 -- train_loss: 1.1744 
2025-08-11 08:52:46,200 - INFO - Epoch: 189/500, Iter: 10/119 -- train_loss: 1.1747 
2025-08-11 08:52:47,948 - INFO - Epoch: 189/500, Iter: 11/119 -- train_loss: 1.1571 
2025-08-11 08:52:49,666 - INFO - Epoch: 189/500, Iter: 12/119 -- train_loss: 1.1728 
2025-08-11 08:52:51,382 - INFO - Epoch: 189/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 08:58:07,680 - INFO - Epoch: 190/500, Iter: 1/119 -- train_loss: 1.1707 


[1/119]   1%|           [00:00<?]

2025-08-11 08:58:09,443 - INFO - Epoch: 190/500, Iter: 2/119 -- train_loss: 1.1674 
2025-08-11 08:58:11,191 - INFO - Epoch: 190/500, Iter: 3/119 -- train_loss: 1.1458 
2025-08-11 08:58:12,957 - INFO - Epoch: 190/500, Iter: 4/119 -- train_loss: 1.1647 
2025-08-11 08:58:14,723 - INFO - Epoch: 190/500, Iter: 5/119 -- train_loss: 1.1587 
2025-08-11 08:58:16,476 - INFO - Epoch: 190/500, Iter: 6/119 -- train_loss: 1.1489 
2025-08-11 08:58:18,225 - INFO - Epoch: 190/500, Iter: 7/119 -- train_loss: 1.1744 
2025-08-11 08:58:19,994 - INFO - Epoch: 190/500, Iter: 8/119 -- train_loss: 1.1744 
2025-08-11 08:58:21,800 - INFO - Epoch: 190/500, Iter: 9/119 -- train_loss: 1.1530 
2025-08-11 08:58:23,573 - INFO - Epoch: 190/500, Iter: 10/119 -- train_loss: 1.1493 
2025-08-11 08:58:25,357 - INFO - Epoch: 190/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-11 08:58:27,088 - INFO - Epoch: 190/500, Iter: 12/119 -- train_loss: 1.1745 
2025-08-11 08:58:28,835 - INFO - Epoch: 190/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:03:57,360 - INFO - Epoch: 191/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 09:03:59,117 - INFO - Epoch: 191/500, Iter: 2/119 -- train_loss: 1.1517 
2025-08-11 09:04:01,496 - INFO - Epoch: 191/500, Iter: 3/119 -- train_loss: 1.1705 
2025-08-11 09:04:03,244 - INFO - Epoch: 191/500, Iter: 4/119 -- train_loss: 1.0603 
2025-08-11 09:04:04,977 - INFO - Epoch: 191/500, Iter: 5/119 -- train_loss: 1.1798 
2025-08-11 09:04:06,726 - INFO - Epoch: 191/500, Iter: 6/119 -- train_loss: 1.1745 
2025-08-11 09:04:08,443 - INFO - Epoch: 191/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 09:04:10,175 - INFO - Epoch: 191/500, Iter: 8/119 -- train_loss: 1.1557 
2025-08-11 09:04:11,942 - INFO - Epoch: 191/500, Iter: 9/119 -- train_loss: 1.1460 
2025-08-11 09:04:13,679 - INFO - Epoch: 191/500, Iter: 10/119 -- train_loss: 1.1530 
2025-08-11 09:04:15,442 - INFO - Epoch: 191/500, Iter: 11/119 -- train_loss: 1.1748 
2025-08-11 09:04:17,172 - INFO - Epoch: 191/500, Iter: 12/119 -- train_loss: 1.1701 
2025-08-11 09:04:18,875 - INFO - Epoch: 191/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:09:30,626 - INFO - Epoch: 192/500, Iter: 1/119 -- train_loss: 1.1779 


[1/119]   1%|           [00:00<?]

2025-08-11 09:09:32,385 - INFO - Epoch: 192/500, Iter: 2/119 -- train_loss: 1.1573 
2025-08-11 09:09:34,124 - INFO - Epoch: 192/500, Iter: 3/119 -- train_loss: 1.1011 
2025-08-11 09:09:35,879 - INFO - Epoch: 192/500, Iter: 4/119 -- train_loss: 1.1686 
2025-08-11 09:09:37,651 - INFO - Epoch: 192/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 09:09:39,373 - INFO - Epoch: 192/500, Iter: 6/119 -- train_loss: 1.1370 
2025-08-11 09:09:41,086 - INFO - Epoch: 192/500, Iter: 7/119 -- train_loss: 1.1749 
2025-08-11 09:09:42,811 - INFO - Epoch: 192/500, Iter: 8/119 -- train_loss: 1.1785 
2025-08-11 09:09:44,556 - INFO - Epoch: 192/500, Iter: 9/119 -- train_loss: 1.0646 
2025-08-11 09:09:46,316 - INFO - Epoch: 192/500, Iter: 10/119 -- train_loss: 1.0086 
2025-08-11 09:09:48,022 - INFO - Epoch: 192/500, Iter: 11/119 -- train_loss: 1.1644 
2025-08-11 09:09:49,780 - INFO - Epoch: 192/500, Iter: 12/119 -- train_loss: 1.1589 
2025-08-11 09:09:51,526 - INFO - Epoch: 192/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:15:06,623 - INFO - Epoch: 193/500, Iter: 1/119 -- train_loss: 1.1481 


[1/119]   1%|           [00:00<?]

2025-08-11 09:15:11,028 - INFO - Epoch: 193/500, Iter: 2/119 -- train_loss: 1.1285 
2025-08-11 09:15:12,770 - INFO - Epoch: 193/500, Iter: 3/119 -- train_loss: 1.1787 
2025-08-11 09:15:15,292 - INFO - Epoch: 193/500, Iter: 4/119 -- train_loss: 1.1591 
2025-08-11 09:15:17,039 - INFO - Epoch: 193/500, Iter: 5/119 -- train_loss: 1.1327 
2025-08-11 09:15:18,785 - INFO - Epoch: 193/500, Iter: 6/119 -- train_loss: 1.0504 
2025-08-11 09:15:20,515 - INFO - Epoch: 193/500, Iter: 7/119 -- train_loss: 1.1750 
2025-08-11 09:15:22,268 - INFO - Epoch: 193/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 09:15:24,033 - INFO - Epoch: 193/500, Iter: 9/119 -- train_loss: 1.1442 
2025-08-11 09:15:25,789 - INFO - Epoch: 193/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 09:15:27,585 - INFO - Epoch: 193/500, Iter: 11/119 -- train_loss: 1.1330 
2025-08-11 09:15:29,329 - INFO - Epoch: 193/500, Iter: 12/119 -- train_loss: 1.1692 
2025-08-11 09:15:31,055 - INFO - Epoch: 193/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:20:00,954 - INFO - Epoch: 194/500, Iter: 1/119 -- train_loss: 1.1494 


[1/119]   1%|           [00:00<?]

2025-08-11 09:20:01,261 - INFO - Epoch: 194/500, Iter: 2/119 -- train_loss: 1.1758 
2025-08-11 09:20:06,506 - INFO - Epoch: 194/500, Iter: 3/119 -- train_loss: 0.9179 
2025-08-11 09:20:06,841 - INFO - Epoch: 194/500, Iter: 4/119 -- train_loss: 1.1366 
2025-08-11 09:20:07,139 - INFO - Epoch: 194/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 09:20:07,468 - INFO - Epoch: 194/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 09:20:17,336 - INFO - Epoch: 194/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 09:20:17,662 - INFO - Epoch: 194/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 09:20:17,966 - INFO - Epoch: 194/500, Iter: 9/119 -- train_loss: 1.1714 
2025-08-11 09:20:18,280 - INFO - Epoch: 194/500, Iter: 10/119 -- train_loss: 1.1744 
2025-08-11 09:20:18,577 - INFO - Epoch: 194/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 09:20:18,921 - INFO - Epoch: 194/500, Iter: 12/119 -- train_loss: 1.1029 
2025-08-11 09:20:19,239 - INFO - Epoch: 194/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:23:59,457 - INFO - Epoch: 195/500, Iter: 1/119 -- train_loss: 1.1490 


[1/119]   1%|           [00:00<?]

2025-08-11 09:23:59,754 - INFO - Epoch: 195/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 09:23:59,999 - INFO - Epoch: 195/500, Iter: 3/119 -- train_loss: 1.1400 
2025-08-11 09:24:00,292 - INFO - Epoch: 195/500, Iter: 4/119 -- train_loss: 1.1634 
2025-08-11 09:24:00,603 - INFO - Epoch: 195/500, Iter: 5/119 -- train_loss: 1.1566 
2025-08-11 09:24:00,896 - INFO - Epoch: 195/500, Iter: 6/119 -- train_loss: 1.1750 
2025-08-11 09:24:01,190 - INFO - Epoch: 195/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 09:24:01,527 - INFO - Epoch: 195/500, Iter: 8/119 -- train_loss: 1.1694 
2025-08-11 09:24:05,646 - INFO - Epoch: 195/500, Iter: 9/119 -- train_loss: 1.1154 
2025-08-11 09:24:05,940 - INFO - Epoch: 195/500, Iter: 10/119 -- train_loss: 1.1422 
2025-08-11 09:24:06,233 - INFO - Epoch: 195/500, Iter: 11/119 -- train_loss: 1.0067 
2025-08-11 09:24:06,511 - INFO - Epoch: 195/500, Iter: 12/119 -- train_loss: 1.1762 
2025-08-11 09:24:06,818 - INFO - Epoch: 195/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:27:27,832 - INFO - Epoch: 196/500, Iter: 1/119 -- train_loss: 1.1751 


[1/119]   1%|           [00:00<?]

2025-08-11 09:27:28,113 - INFO - Epoch: 196/500, Iter: 2/119 -- train_loss: 1.1698 
2025-08-11 09:27:28,484 - INFO - Epoch: 196/500, Iter: 3/119 -- train_loss: 1.0690 
2025-08-11 09:27:28,766 - INFO - Epoch: 196/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 09:27:35,597 - INFO - Epoch: 196/500, Iter: 5/119 -- train_loss: 1.1541 
2025-08-11 09:27:35,889 - INFO - Epoch: 196/500, Iter: 6/119 -- train_loss: 1.1536 
2025-08-11 09:27:36,220 - INFO - Epoch: 196/500, Iter: 7/119 -- train_loss: 1.1722 
2025-08-11 09:27:36,524 - INFO - Epoch: 196/500, Iter: 8/119 -- train_loss: 1.0925 
2025-08-11 09:27:36,846 - INFO - Epoch: 196/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 09:27:37,171 - INFO - Epoch: 196/500, Iter: 10/119 -- train_loss: 1.1670 
2025-08-11 09:27:39,128 - INFO - Epoch: 196/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 09:27:39,425 - INFO - Epoch: 196/500, Iter: 12/119 -- train_loss: 1.1746 
2025-08-11 09:27:43,592 - INFO - Epoch: 196/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:31:04,526 - INFO - Epoch: 197/500, Iter: 1/119 -- train_loss: 1.1146 


[1/119]   1%|           [00:00<?]

2025-08-11 09:31:04,823 - INFO - Epoch: 197/500, Iter: 2/119 -- train_loss: 1.1812 
2025-08-11 09:31:05,122 - INFO - Epoch: 197/500, Iter: 3/119 -- train_loss: 1.1732 
2025-08-11 09:31:05,406 - INFO - Epoch: 197/500, Iter: 4/119 -- train_loss: 1.1744 
2025-08-11 09:31:05,711 - INFO - Epoch: 197/500, Iter: 5/119 -- train_loss: 1.1666 
2025-08-11 09:31:05,960 - INFO - Epoch: 197/500, Iter: 6/119 -- train_loss: 1.1728 
2025-08-11 09:31:06,241 - INFO - Epoch: 197/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 09:31:06,499 - INFO - Epoch: 197/500, Iter: 8/119 -- train_loss: 1.1672 
2025-08-11 09:31:12,559 - INFO - Epoch: 197/500, Iter: 9/119 -- train_loss: 1.1687 
2025-08-11 09:31:12,856 - INFO - Epoch: 197/500, Iter: 10/119 -- train_loss: 1.1749 
2025-08-11 09:31:13,114 - INFO - Epoch: 197/500, Iter: 11/119 -- train_loss: 1.1716 
2025-08-11 09:31:13,431 - INFO - Epoch: 197/500, Iter: 12/119 -- train_loss: 1.1744 
2025-08-11 09:31:13,715 - INFO - Epoch: 197/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:34:30,254 - INFO - Epoch: 198/500, Iter: 1/119 -- train_loss: 1.1546 


[1/119]   1%|           [00:00<?]

2025-08-11 09:34:33,426 - INFO - Epoch: 198/500, Iter: 2/119 -- train_loss: 1.1715 
2025-08-11 09:34:33,732 - INFO - Epoch: 198/500, Iter: 3/119 -- train_loss: 1.1744 
2025-08-11 09:34:34,019 - INFO - Epoch: 198/500, Iter: 4/119 -- train_loss: 1.1698 
2025-08-11 09:34:34,308 - INFO - Epoch: 198/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 09:34:34,581 - INFO - Epoch: 198/500, Iter: 6/119 -- train_loss: 1.1481 
2025-08-11 09:34:34,871 - INFO - Epoch: 198/500, Iter: 7/119 -- train_loss: 1.1433 
2025-08-11 09:34:35,203 - INFO - Epoch: 198/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 09:34:35,591 - INFO - Epoch: 198/500, Iter: 9/119 -- train_loss: 1.1548 
2025-08-11 09:34:40,641 - INFO - Epoch: 198/500, Iter: 10/119 -- train_loss: 1.1703 
2025-08-11 09:34:40,963 - INFO - Epoch: 198/500, Iter: 11/119 -- train_loss: 1.1634 
2025-08-11 09:34:41,265 - INFO - Epoch: 198/500, Iter: 12/119 -- train_loss: 1.1335 
2025-08-11 09:34:41,548 - INFO - Epoch: 198/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:38:10,211 - INFO - Epoch: 199/500, Iter: 1/119 -- train_loss: 1.1703 


[1/119]   1%|           [00:00<?]

2025-08-11 09:38:10,508 - INFO - Epoch: 199/500, Iter: 2/119 -- train_loss: 1.1744 
2025-08-11 09:38:10,854 - INFO - Epoch: 199/500, Iter: 3/119 -- train_loss: 1.1782 
2025-08-11 09:38:11,150 - INFO - Epoch: 199/500, Iter: 4/119 -- train_loss: 1.1023 
2025-08-11 09:38:11,466 - INFO - Epoch: 199/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 09:38:11,793 - INFO - Epoch: 199/500, Iter: 6/119 -- train_loss: 1.1246 
2025-08-11 09:38:12,102 - INFO - Epoch: 199/500, Iter: 7/119 -- train_loss: 1.1458 
2025-08-11 09:38:13,126 - INFO - Epoch: 199/500, Iter: 8/119 -- train_loss: 1.1725 
2025-08-11 09:38:14,616 - INFO - Epoch: 199/500, Iter: 9/119 -- train_loss: 1.1367 
2025-08-11 09:38:22,376 - INFO - Epoch: 199/500, Iter: 10/119 -- train_loss: 1.1824 
2025-08-11 09:38:22,660 - INFO - Epoch: 199/500, Iter: 11/119 -- train_loss: 1.1680 
2025-08-11 09:38:22,979 - INFO - Epoch: 199/500, Iter: 12/119 -- train_loss: 1.1560 
2025-08-11 09:38:23,316 - INFO - Epoch: 199/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:41:45,370 - INFO - Epoch: 200/500, Iter: 1/119 -- train_loss: 1.1802 


[1/119]   1%|           [00:00<?]

2025-08-11 09:41:45,723 - INFO - Epoch: 200/500, Iter: 2/119 -- train_loss: 1.1753 
2025-08-11 09:41:46,772 - INFO - Epoch: 200/500, Iter: 3/119 -- train_loss: 1.1764 
2025-08-11 09:41:47,052 - INFO - Epoch: 200/500, Iter: 4/119 -- train_loss: 1.1749 
2025-08-11 09:41:47,374 - INFO - Epoch: 200/500, Iter: 5/119 -- train_loss: 1.0925 
2025-08-11 09:41:50,884 - INFO - Epoch: 200/500, Iter: 6/119 -- train_loss: 1.1791 
2025-08-11 09:41:51,199 - INFO - Epoch: 200/500, Iter: 7/119 -- train_loss: 1.1757 
2025-08-11 09:41:51,477 - INFO - Epoch: 200/500, Iter: 8/119 -- train_loss: 1.1286 
2025-08-11 09:41:51,797 - INFO - Epoch: 200/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 09:41:53,813 - INFO - Epoch: 200/500, Iter: 10/119 -- train_loss: 1.1224 
2025-08-11 09:41:54,151 - INFO - Epoch: 200/500, Iter: 11/119 -- train_loss: 1.1613 
2025-08-11 09:41:54,458 - INFO - Epoch: 200/500, Iter: 12/119 -- train_loss: 1.1745 
2025-08-11 09:41:54,769 - INFO - Epoch: 200/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:45:33,522 - INFO - Epoch: 201/500, Iter: 1/119 -- train_loss: 1.1328 


[1/119]   1%|           [00:00<?]

2025-08-11 09:45:33,839 - INFO - Epoch: 201/500, Iter: 2/119 -- train_loss: 1.1687 
2025-08-11 09:45:34,163 - INFO - Epoch: 201/500, Iter: 3/119 -- train_loss: 1.1647 
2025-08-11 09:45:34,511 - INFO - Epoch: 201/500, Iter: 4/119 -- train_loss: 1.1606 
2025-08-11 09:45:35,437 - INFO - Epoch: 201/500, Iter: 5/119 -- train_loss: 1.1381 
2025-08-11 09:45:35,744 - INFO - Epoch: 201/500, Iter: 6/119 -- train_loss: 1.1382 
2025-08-11 09:45:41,913 - INFO - Epoch: 201/500, Iter: 7/119 -- train_loss: 1.1483 
2025-08-11 09:45:42,197 - INFO - Epoch: 201/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 09:45:42,523 - INFO - Epoch: 201/500, Iter: 9/119 -- train_loss: 0.9610 
2025-08-11 09:45:42,829 - INFO - Epoch: 201/500, Iter: 10/119 -- train_loss: 1.1744 
2025-08-11 09:45:43,123 - INFO - Epoch: 201/500, Iter: 11/119 -- train_loss: 1.1766 
2025-08-11 09:45:43,450 - INFO - Epoch: 201/500, Iter: 12/119 -- train_loss: 1.1574 
2025-08-11 09:45:43,782 - INFO - Epoch: 201/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:49:10,182 - INFO - Epoch: 202/500, Iter: 1/119 -- train_loss: 1.1712 


[1/119]   1%|           [00:00<?]

2025-08-11 09:49:17,772 - INFO - Epoch: 202/500, Iter: 2/119 -- train_loss: 1.1709 
2025-08-11 09:49:18,045 - INFO - Epoch: 202/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 09:49:18,368 - INFO - Epoch: 202/500, Iter: 4/119 -- train_loss: 1.1662 
2025-08-11 09:49:20,931 - INFO - Epoch: 202/500, Iter: 5/119 -- train_loss: 1.1744 
2025-08-11 09:49:21,242 - INFO - Epoch: 202/500, Iter: 6/119 -- train_loss: 1.1447 
2025-08-11 09:49:21,531 - INFO - Epoch: 202/500, Iter: 7/119 -- train_loss: 1.1531 
2025-08-11 09:49:21,834 - INFO - Epoch: 202/500, Iter: 8/119 -- train_loss: 1.0488 
2025-08-11 09:49:22,153 - INFO - Epoch: 202/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 09:49:28,145 - INFO - Epoch: 202/500, Iter: 10/119 -- train_loss: 1.1745 
2025-08-11 09:49:28,404 - INFO - Epoch: 202/500, Iter: 11/119 -- train_loss: 1.1028 
2025-08-11 09:49:28,691 - INFO - Epoch: 202/500, Iter: 12/119 -- train_loss: 1.1522 
2025-08-11 09:49:28,990 - INFO - Epoch: 202/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:52:56,919 - INFO - Epoch: 203/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 09:52:57,591 - INFO - Epoch: 203/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-11 09:52:57,894 - INFO - Epoch: 203/500, Iter: 3/119 -- train_loss: 1.1744 
2025-08-11 09:52:58,202 - INFO - Epoch: 203/500, Iter: 4/119 -- train_loss: 1.0385 
2025-08-11 09:52:58,507 - INFO - Epoch: 203/500, Iter: 5/119 -- train_loss: 1.1794 
2025-08-11 09:52:59,331 - INFO - Epoch: 203/500, Iter: 6/119 -- train_loss: 1.1660 
2025-08-11 09:52:59,603 - INFO - Epoch: 203/500, Iter: 7/119 -- train_loss: 1.1594 
2025-08-11 09:53:02,847 - INFO - Epoch: 203/500, Iter: 8/119 -- train_loss: 1.1770 
2025-08-11 09:53:08,891 - INFO - Epoch: 203/500, Iter: 9/119 -- train_loss: 1.1812 
2025-08-11 09:53:09,134 - INFO - Epoch: 203/500, Iter: 10/119 -- train_loss: 1.1872 
2025-08-11 09:53:09,414 - INFO - Epoch: 203/500, Iter: 11/119 -- train_loss: 1.1594 
2025-08-11 09:53:09,745 - INFO - Epoch: 203/500, Iter: 12/119 -- train_loss: 1.1709 
2025-08-11 09:53:10,062 - INFO - Epoch: 203/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:56:35,251 - INFO - Epoch: 204/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 09:56:36,691 - INFO - Epoch: 204/500, Iter: 2/119 -- train_loss: 1.1744 
2025-08-11 09:56:37,013 - INFO - Epoch: 204/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 09:56:37,559 - INFO - Epoch: 204/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 09:56:37,837 - INFO - Epoch: 204/500, Iter: 5/119 -- train_loss: 1.1643 
2025-08-11 09:56:38,112 - INFO - Epoch: 204/500, Iter: 6/119 -- train_loss: 1.1205 
2025-08-11 09:56:38,405 - INFO - Epoch: 204/500, Iter: 7/119 -- train_loss: 1.1121 
2025-08-11 09:56:38,717 - INFO - Epoch: 204/500, Iter: 8/119 -- train_loss: 1.1723 
2025-08-11 09:56:39,208 - INFO - Epoch: 204/500, Iter: 9/119 -- train_loss: 1.1673 
2025-08-11 09:56:42,577 - INFO - Epoch: 204/500, Iter: 10/119 -- train_loss: 1.0870 
2025-08-11 09:56:43,471 - INFO - Epoch: 204/500, Iter: 11/119 -- train_loss: 1.1716 
2025-08-11 09:56:47,004 - INFO - Epoch: 204/500, Iter: 12/119 -- train_loss: 1.1664 
2025-08-11 09:56:47,272 - INFO - Epoch: 204/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 09:59:56,816 - INFO - Epoch: 205/500, Iter: 1/119 -- train_loss: 1.1535 


[1/119]   1%|           [00:00<?]

2025-08-11 10:00:04,206 - INFO - Epoch: 205/500, Iter: 2/119 -- train_loss: 1.1784 
2025-08-11 10:00:04,540 - INFO - Epoch: 205/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 10:00:04,813 - INFO - Epoch: 205/500, Iter: 4/119 -- train_loss: 1.1017 
2025-08-11 10:00:05,103 - INFO - Epoch: 205/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 10:00:05,437 - INFO - Epoch: 205/500, Iter: 6/119 -- train_loss: 1.1678 
2025-08-11 10:00:05,715 - INFO - Epoch: 205/500, Iter: 7/119 -- train_loss: 1.1643 
2025-08-11 10:00:06,042 - INFO - Epoch: 205/500, Iter: 8/119 -- train_loss: 1.1740 
2025-08-11 10:00:06,339 - INFO - Epoch: 205/500, Iter: 9/119 -- train_loss: 1.1089 
2025-08-11 10:00:09,295 - INFO - Epoch: 205/500, Iter: 10/119 -- train_loss: 1.1748 
2025-08-11 10:00:09,611 - INFO - Epoch: 205/500, Iter: 11/119 -- train_loss: 1.0889 
2025-08-11 10:00:09,875 - INFO - Epoch: 205/500, Iter: 12/119 -- train_loss: 1.1670 
2025-08-11 10:00:10,160 - INFO - Epoch: 205/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:03:33,741 - INFO - Epoch: 206/500, Iter: 1/119 -- train_loss: 1.1506 


[1/119]   1%|           [00:00<?]

2025-08-11 10:03:36,045 - INFO - Epoch: 206/500, Iter: 2/119 -- train_loss: 1.1528 
2025-08-11 10:03:36,378 - INFO - Epoch: 206/500, Iter: 3/119 -- train_loss: 1.1812 
2025-08-11 10:03:39,325 - INFO - Epoch: 206/500, Iter: 4/119 -- train_loss: 1.1816 
2025-08-11 10:03:39,596 - INFO - Epoch: 206/500, Iter: 5/119 -- train_loss: 1.1770 
2025-08-11 10:03:41,610 - INFO - Epoch: 206/500, Iter: 6/119 -- train_loss: 1.1302 
2025-08-11 10:03:41,920 - INFO - Epoch: 206/500, Iter: 7/119 -- train_loss: 1.1744 
2025-08-11 10:03:42,208 - INFO - Epoch: 206/500, Iter: 8/119 -- train_loss: 1.0424 
2025-08-11 10:03:42,514 - INFO - Epoch: 206/500, Iter: 9/119 -- train_loss: 1.1745 
2025-08-11 10:03:42,826 - INFO - Epoch: 206/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 10:03:43,170 - INFO - Epoch: 206/500, Iter: 11/119 -- train_loss: 1.1748 
2025-08-11 10:03:47,695 - INFO - Epoch: 206/500, Iter: 12/119 -- train_loss: 1.1324 
2025-08-11 10:03:47,981 - INFO - Epoch: 206/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:07:21,430 - INFO - Epoch: 207/500, Iter: 1/119 -- train_loss: 1.1383 


[1/119]   1%|           [00:00<?]

2025-08-11 10:07:21,746 - INFO - Epoch: 207/500, Iter: 2/119 -- train_loss: 1.1321 
2025-08-11 10:07:22,049 - INFO - Epoch: 207/500, Iter: 3/119 -- train_loss: 1.1753 
2025-08-11 10:07:23,699 - INFO - Epoch: 207/500, Iter: 4/119 -- train_loss: 1.1799 
2025-08-11 10:07:23,985 - INFO - Epoch: 207/500, Iter: 5/119 -- train_loss: 1.1472 
2025-08-11 10:07:28,247 - INFO - Epoch: 207/500, Iter: 6/119 -- train_loss: 1.1735 
2025-08-11 10:07:28,528 - INFO - Epoch: 207/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 10:07:28,832 - INFO - Epoch: 207/500, Iter: 8/119 -- train_loss: 1.1538 
2025-08-11 10:07:33,595 - INFO - Epoch: 207/500, Iter: 9/119 -- train_loss: 1.1582 
2025-08-11 10:07:33,916 - INFO - Epoch: 207/500, Iter: 10/119 -- train_loss: 1.1729 
2025-08-11 10:07:34,215 - INFO - Epoch: 207/500, Iter: 11/119 -- train_loss: 1.1752 
2025-08-11 10:07:34,536 - INFO - Epoch: 207/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 10:07:34,841 - INFO - Epoch: 207/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:11:07,943 - INFO - Epoch: 208/500, Iter: 1/119 -- train_loss: 1.1777 


[1/119]   1%|           [00:00<?]

2025-08-11 10:11:10,975 - INFO - Epoch: 208/500, Iter: 2/119 -- train_loss: 1.1358 
2025-08-11 10:11:13,578 - INFO - Epoch: 208/500, Iter: 3/119 -- train_loss: 1.1543 
2025-08-11 10:11:15,177 - INFO - Epoch: 208/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 10:11:15,461 - INFO - Epoch: 208/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 10:11:15,792 - INFO - Epoch: 208/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 10:11:16,081 - INFO - Epoch: 208/500, Iter: 7/119 -- train_loss: 1.1613 
2025-08-11 10:11:16,377 - INFO - Epoch: 208/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 10:11:16,674 - INFO - Epoch: 208/500, Iter: 9/119 -- train_loss: 1.1697 
2025-08-11 10:11:22,166 - INFO - Epoch: 208/500, Iter: 10/119 -- train_loss: 1.1635 
2025-08-11 10:11:22,444 - INFO - Epoch: 208/500, Iter: 11/119 -- train_loss: 1.1700 
2025-08-11 10:11:24,763 - INFO - Epoch: 208/500, Iter: 12/119 -- train_loss: 1.1556 
2025-08-11 10:11:25,019 - INFO - Epoch: 208/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:15:05,532 - INFO - Epoch: 209/500, Iter: 1/119 -- train_loss: 1.1729 


[1/119]   1%|           [00:00<?]

2025-08-11 10:15:05,812 - INFO - Epoch: 209/500, Iter: 2/119 -- train_loss: 1.1533 
2025-08-11 10:15:06,112 - INFO - Epoch: 209/500, Iter: 3/119 -- train_loss: 1.1744 
2025-08-11 10:15:06,428 - INFO - Epoch: 209/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 10:15:06,722 - INFO - Epoch: 209/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 10:15:07,038 - INFO - Epoch: 209/500, Iter: 6/119 -- train_loss: 1.1682 
2025-08-11 10:15:07,341 - INFO - Epoch: 209/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 10:15:07,612 - INFO - Epoch: 209/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 10:15:10,828 - INFO - Epoch: 209/500, Iter: 9/119 -- train_loss: 1.1561 
2025-08-11 10:15:12,253 - INFO - Epoch: 209/500, Iter: 10/119 -- train_loss: 1.1675 
2025-08-11 10:15:12,537 - INFO - Epoch: 209/500, Iter: 11/119 -- train_loss: 1.1745 
2025-08-11 10:15:12,815 - INFO - Epoch: 209/500, Iter: 12/119 -- train_loss: 1.1302 
2025-08-11 10:15:13,090 - INFO - Epoch: 209/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:18:56,014 - INFO - Epoch: 210/500, Iter: 1/119 -- train_loss: 1.1702 


[1/119]   1%|           [00:00<?]

2025-08-11 10:19:00,804 - INFO - Epoch: 210/500, Iter: 2/119 -- train_loss: 1.1759 
2025-08-11 10:19:01,123 - INFO - Epoch: 210/500, Iter: 3/119 -- train_loss: 1.1766 
2025-08-11 10:19:03,502 - INFO - Epoch: 210/500, Iter: 4/119 -- train_loss: 1.1500 
2025-08-11 10:19:03,832 - INFO - Epoch: 210/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 10:19:04,166 - INFO - Epoch: 210/500, Iter: 6/119 -- train_loss: 1.1744 
2025-08-11 10:19:04,511 - INFO - Epoch: 210/500, Iter: 7/119 -- train_loss: 1.1764 
2025-08-11 10:19:04,804 - INFO - Epoch: 210/500, Iter: 8/119 -- train_loss: 1.1403 
2025-08-11 10:19:09,004 - INFO - Epoch: 210/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 10:19:12,599 - INFO - Epoch: 210/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 10:19:12,882 - INFO - Epoch: 210/500, Iter: 11/119 -- train_loss: 1.1702 
2025-08-11 10:19:13,185 - INFO - Epoch: 210/500, Iter: 12/119 -- train_loss: 1.1491 
2025-08-11 10:19:13,498 - INFO - Epoch: 210/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:22:41,763 - INFO - Epoch: 211/500, Iter: 1/119 -- train_loss: 1.1667 


[1/119]   1%|           [00:00<?]

2025-08-11 10:22:42,084 - INFO - Epoch: 211/500, Iter: 2/119 -- train_loss: 1.1694 
2025-08-11 10:22:43,695 - INFO - Epoch: 211/500, Iter: 3/119 -- train_loss: 1.1693 
2025-08-11 10:22:43,989 - INFO - Epoch: 211/500, Iter: 4/119 -- train_loss: 1.1396 
2025-08-11 10:22:44,241 - INFO - Epoch: 211/500, Iter: 5/119 -- train_loss: 1.1744 
2025-08-11 10:22:44,557 - INFO - Epoch: 211/500, Iter: 6/119 -- train_loss: 1.0253 
2025-08-11 10:22:44,864 - INFO - Epoch: 211/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 10:22:45,135 - INFO - Epoch: 211/500, Iter: 8/119 -- train_loss: 1.0830 
2025-08-11 10:22:50,138 - INFO - Epoch: 211/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 10:22:50,440 - INFO - Epoch: 211/500, Iter: 10/119 -- train_loss: 1.1524 
2025-08-11 10:22:52,640 - INFO - Epoch: 211/500, Iter: 11/119 -- train_loss: 1.1749 
2025-08-11 10:22:52,927 - INFO - Epoch: 211/500, Iter: 12/119 -- train_loss: 1.1656 
2025-08-11 10:22:53,196 - INFO - Epoch: 211/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:26:23,875 - INFO - Epoch: 212/500, Iter: 1/119 -- train_loss: 1.1751 


[1/119]   1%|           [00:00<?]

2025-08-11 10:26:24,187 - INFO - Epoch: 212/500, Iter: 2/119 -- train_loss: 1.1745 
2025-08-11 10:26:24,495 - INFO - Epoch: 212/500, Iter: 3/119 -- train_loss: 1.1750 
2025-08-11 10:26:24,825 - INFO - Epoch: 212/500, Iter: 4/119 -- train_loss: 1.1657 
2025-08-11 10:26:25,172 - INFO - Epoch: 212/500, Iter: 5/119 -- train_loss: 1.0522 
2025-08-11 10:26:25,458 - INFO - Epoch: 212/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 10:26:25,764 - INFO - Epoch: 212/500, Iter: 7/119 -- train_loss: 1.1389 
2025-08-11 10:26:28,975 - INFO - Epoch: 212/500, Iter: 8/119 -- train_loss: 1.1709 
2025-08-11 10:26:35,875 - INFO - Epoch: 212/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 10:26:36,162 - INFO - Epoch: 212/500, Iter: 10/119 -- train_loss: 1.1555 
2025-08-11 10:26:36,435 - INFO - Epoch: 212/500, Iter: 11/119 -- train_loss: 1.1016 
2025-08-11 10:26:36,708 - INFO - Epoch: 212/500, Iter: 12/119 -- train_loss: 1.1745 
2025-08-11 10:26:37,024 - INFO - Epoch: 212/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:30:09,659 - INFO - Epoch: 213/500, Iter: 1/119 -- train_loss: 0.9660 


[1/119]   1%|           [00:00<?]

2025-08-11 10:30:13,147 - INFO - Epoch: 213/500, Iter: 2/119 -- train_loss: 1.1170 
2025-08-11 10:30:13,421 - INFO - Epoch: 213/500, Iter: 3/119 -- train_loss: 1.1288 
2025-08-11 10:30:13,720 - INFO - Epoch: 213/500, Iter: 4/119 -- train_loss: 1.1589 
2025-08-11 10:30:13,998 - INFO - Epoch: 213/500, Iter: 5/119 -- train_loss: 1.1085 
2025-08-11 10:30:14,291 - INFO - Epoch: 213/500, Iter: 6/119 -- train_loss: 1.1802 
2025-08-11 10:30:14,604 - INFO - Epoch: 213/500, Iter: 7/119 -- train_loss: 1.1820 
2025-08-11 10:30:14,917 - INFO - Epoch: 213/500, Iter: 8/119 -- train_loss: 1.1707 
2025-08-11 10:30:17,517 - INFO - Epoch: 213/500, Iter: 9/119 -- train_loss: 1.1754 
2025-08-11 10:30:20,141 - INFO - Epoch: 213/500, Iter: 10/119 -- train_loss: 1.0414 
2025-08-11 10:30:20,463 - INFO - Epoch: 213/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 10:30:20,773 - INFO - Epoch: 213/500, Iter: 12/119 -- train_loss: 1.1568 
2025-08-11 10:30:21,057 - INFO - Epoch: 213/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:33:51,336 - INFO - Epoch: 214/500, Iter: 1/119 -- train_loss: 1.1504 


[1/119]   1%|           [00:00<?]

2025-08-11 10:33:51,659 - INFO - Epoch: 214/500, Iter: 2/119 -- train_loss: 1.1712 
2025-08-11 10:33:51,937 - INFO - Epoch: 214/500, Iter: 3/119 -- train_loss: 1.1759 
2025-08-11 10:33:52,261 - INFO - Epoch: 214/500, Iter: 4/119 -- train_loss: 1.1867 
2025-08-11 10:33:52,586 - INFO - Epoch: 214/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 10:33:52,875 - INFO - Epoch: 214/500, Iter: 6/119 -- train_loss: 1.1452 
2025-08-11 10:33:53,163 - INFO - Epoch: 214/500, Iter: 7/119 -- train_loss: 1.1708 
2025-08-11 10:33:53,953 - INFO - Epoch: 214/500, Iter: 8/119 -- train_loss: 1.0080 
2025-08-11 10:33:57,269 - INFO - Epoch: 214/500, Iter: 9/119 -- train_loss: 1.1642 
2025-08-11 10:33:57,602 - INFO - Epoch: 214/500, Iter: 10/119 -- train_loss: 1.1761 
2025-08-11 10:33:57,950 - INFO - Epoch: 214/500, Iter: 11/119 -- train_loss: 1.1753 
2025-08-11 10:33:58,253 - INFO - Epoch: 214/500, Iter: 12/119 -- train_loss: 1.1684 
2025-08-11 10:33:58,566 - INFO - Epoch: 214/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:37:28,296 - INFO - Epoch: 215/500, Iter: 1/119 -- train_loss: 1.1752 


[1/119]   1%|           [00:00<?]

2025-08-11 10:37:30,110 - INFO - Epoch: 215/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 10:37:30,398 - INFO - Epoch: 215/500, Iter: 3/119 -- train_loss: 1.1456 
2025-08-11 10:37:30,708 - INFO - Epoch: 215/500, Iter: 4/119 -- train_loss: 1.1676 
2025-08-11 10:37:31,036 - INFO - Epoch: 215/500, Iter: 5/119 -- train_loss: 1.1699 
2025-08-11 10:37:31,339 - INFO - Epoch: 215/500, Iter: 6/119 -- train_loss: 1.1750 
2025-08-11 10:37:39,903 - INFO - Epoch: 215/500, Iter: 7/119 -- train_loss: 1.1685 
2025-08-11 10:37:40,195 - INFO - Epoch: 215/500, Iter: 8/119 -- train_loss: 1.1724 
2025-08-11 10:37:40,502 - INFO - Epoch: 215/500, Iter: 9/119 -- train_loss: 1.1168 
2025-08-11 10:37:40,780 - INFO - Epoch: 215/500, Iter: 10/119 -- train_loss: 1.1755 
2025-08-11 10:37:41,077 - INFO - Epoch: 215/500, Iter: 11/119 -- train_loss: 1.1788 
2025-08-11 10:37:41,390 - INFO - Epoch: 215/500, Iter: 12/119 -- train_loss: 1.1699 
2025-08-11 10:37:41,714 - INFO - Epoch: 215/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:41:22,452 - INFO - Epoch: 216/500, Iter: 1/119 -- train_loss: 1.1625 


[1/119]   1%|           [00:00<?]

2025-08-11 10:41:25,397 - INFO - Epoch: 216/500, Iter: 2/119 -- train_loss: 1.1003 
2025-08-11 10:41:25,742 - INFO - Epoch: 216/500, Iter: 3/119 -- train_loss: 1.1730 
2025-08-11 10:41:27,496 - INFO - Epoch: 216/500, Iter: 4/119 -- train_loss: 1.1751 
2025-08-11 10:41:27,823 - INFO - Epoch: 216/500, Iter: 5/119 -- train_loss: 1.1394 
2025-08-11 10:41:28,130 - INFO - Epoch: 216/500, Iter: 6/119 -- train_loss: 1.1494 
2025-08-11 10:41:28,415 - INFO - Epoch: 216/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 10:41:28,728 - INFO - Epoch: 216/500, Iter: 8/119 -- train_loss: 1.1703 
2025-08-11 10:41:30,999 - INFO - Epoch: 216/500, Iter: 9/119 -- train_loss: 1.1777 
2025-08-11 10:41:32,933 - INFO - Epoch: 216/500, Iter: 10/119 -- train_loss: 1.1290 
2025-08-11 10:41:33,241 - INFO - Epoch: 216/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 10:41:35,712 - INFO - Epoch: 216/500, Iter: 12/119 -- train_loss: 1.1658 
2025-08-11 10:41:36,003 - INFO - Epoch: 216/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:45:06,398 - INFO - Epoch: 217/500, Iter: 1/119 -- train_loss: 1.0482 


[1/119]   1%|           [00:00<?]

2025-08-11 10:45:06,701 - INFO - Epoch: 217/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 10:45:08,050 - INFO - Epoch: 217/500, Iter: 3/119 -- train_loss: 1.1275 
2025-08-11 10:45:08,347 - INFO - Epoch: 217/500, Iter: 4/119 -- train_loss: 1.1622 
2025-08-11 10:45:08,624 - INFO - Epoch: 217/500, Iter: 5/119 -- train_loss: 1.1569 
2025-08-11 10:45:08,925 - INFO - Epoch: 217/500, Iter: 6/119 -- train_loss: 1.1744 
2025-08-11 10:45:12,056 - INFO - Epoch: 217/500, Iter: 7/119 -- train_loss: 1.1624 
2025-08-11 10:45:12,356 - INFO - Epoch: 217/500, Iter: 8/119 -- train_loss: 1.1506 
2025-08-11 10:45:12,664 - INFO - Epoch: 217/500, Iter: 9/119 -- train_loss: 1.1424 
2025-08-11 10:45:12,962 - INFO - Epoch: 217/500, Iter: 10/119 -- train_loss: 1.1744 
2025-08-11 10:45:13,531 - INFO - Epoch: 217/500, Iter: 11/119 -- train_loss: 1.1676 
2025-08-11 10:45:21,127 - INFO - Epoch: 217/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 10:45:21,407 - INFO - Epoch: 217/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:48:49,700 - INFO - Epoch: 218/500, Iter: 1/119 -- train_loss: 1.1428 


[1/119]   1%|           [00:00<?]

2025-08-11 10:48:50,047 - INFO - Epoch: 218/500, Iter: 2/119 -- train_loss: 1.0826 
2025-08-11 10:48:50,334 - INFO - Epoch: 218/500, Iter: 3/119 -- train_loss: 1.1773 
2025-08-11 10:48:50,662 - INFO - Epoch: 218/500, Iter: 4/119 -- train_loss: 1.1505 
2025-08-11 10:48:50,974 - INFO - Epoch: 218/500, Iter: 5/119 -- train_loss: 1.1720 
2025-08-11 10:48:51,317 - INFO - Epoch: 218/500, Iter: 6/119 -- train_loss: 1.1446 
2025-08-11 10:48:51,592 - INFO - Epoch: 218/500, Iter: 7/119 -- train_loss: 1.1744 
2025-08-11 10:48:51,909 - INFO - Epoch: 218/500, Iter: 8/119 -- train_loss: 1.1406 
2025-08-11 10:48:57,073 - INFO - Epoch: 218/500, Iter: 9/119 -- train_loss: 1.0696 
2025-08-11 10:48:58,092 - INFO - Epoch: 218/500, Iter: 10/119 -- train_loss: 1.1704 
2025-08-11 10:48:58,408 - INFO - Epoch: 218/500, Iter: 11/119 -- train_loss: 1.1549 
2025-08-11 10:48:58,692 - INFO - Epoch: 218/500, Iter: 12/119 -- train_loss: 1.1771 
2025-08-11 10:48:59,047 - INFO - Epoch: 218/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:52:40,223 - INFO - Epoch: 219/500, Iter: 1/119 -- train_loss: 1.1436 


[1/119]   1%|           [00:00<?]

2025-08-11 10:52:40,548 - INFO - Epoch: 219/500, Iter: 2/119 -- train_loss: 1.1593 
2025-08-11 10:52:40,878 - INFO - Epoch: 219/500, Iter: 3/119 -- train_loss: 0.9885 
2025-08-11 10:52:41,152 - INFO - Epoch: 219/500, Iter: 4/119 -- train_loss: 1.1746 
2025-08-11 10:52:41,435 - INFO - Epoch: 219/500, Iter: 5/119 -- train_loss: 1.1684 
2025-08-11 10:52:41,753 - INFO - Epoch: 219/500, Iter: 6/119 -- train_loss: 1.1809 
2025-08-11 10:52:42,633 - INFO - Epoch: 219/500, Iter: 7/119 -- train_loss: 1.1795 
2025-08-11 10:52:44,124 - INFO - Epoch: 219/500, Iter: 8/119 -- train_loss: 1.1601 
2025-08-11 10:52:47,614 - INFO - Epoch: 219/500, Iter: 9/119 -- train_loss: 1.1017 
2025-08-11 10:52:49,131 - INFO - Epoch: 219/500, Iter: 10/119 -- train_loss: 1.1873 
2025-08-11 10:52:53,370 - INFO - Epoch: 219/500, Iter: 11/119 -- train_loss: 1.1803 
2025-08-11 10:52:53,682 - INFO - Epoch: 219/500, Iter: 12/119 -- train_loss: 1.1771 
2025-08-11 10:52:53,993 - INFO - Epoch: 219/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 10:56:20,329 - INFO - Epoch: 220/500, Iter: 1/119 -- train_loss: 1.1749 


[1/119]   1%|           [00:00<?]

2025-08-11 10:56:21,727 - INFO - Epoch: 220/500, Iter: 2/119 -- train_loss: 1.1546 
2025-08-11 10:56:22,531 - INFO - Epoch: 220/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 10:56:23,432 - INFO - Epoch: 220/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 10:56:23,754 - INFO - Epoch: 220/500, Iter: 5/119 -- train_loss: 1.1426 
2025-08-11 10:56:24,066 - INFO - Epoch: 220/500, Iter: 6/119 -- train_loss: 1.1481 
2025-08-11 10:56:24,365 - INFO - Epoch: 220/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 10:56:24,663 - INFO - Epoch: 220/500, Iter: 8/119 -- train_loss: 1.1317 
2025-08-11 10:56:27,604 - INFO - Epoch: 220/500, Iter: 9/119 -- train_loss: 1.1630 
2025-08-11 10:56:27,920 - INFO - Epoch: 220/500, Iter: 10/119 -- train_loss: 1.1274 
2025-08-11 10:56:30,397 - INFO - Epoch: 220/500, Iter: 11/119 -- train_loss: 1.1579 
2025-08-11 10:56:30,699 - INFO - Epoch: 220/500, Iter: 12/119 -- train_loss: 1.1709 
2025-08-11 10:56:31,021 - INFO - Epoch: 220/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:00:09,270 - INFO - Epoch: 221/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 11:00:10,713 - INFO - Epoch: 221/500, Iter: 2/119 -- train_loss: 1.1651 
2025-08-11 11:00:11,024 - INFO - Epoch: 221/500, Iter: 3/119 -- train_loss: 1.1581 
2025-08-11 11:00:12,058 - INFO - Epoch: 221/500, Iter: 4/119 -- train_loss: 1.1657 
2025-08-11 11:00:12,384 - INFO - Epoch: 221/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 11:00:15,175 - INFO - Epoch: 221/500, Iter: 6/119 -- train_loss: 1.1376 
2025-08-11 11:00:15,445 - INFO - Epoch: 221/500, Iter: 7/119 -- train_loss: 1.1691 
2025-08-11 11:00:19,434 - INFO - Epoch: 221/500, Iter: 8/119 -- train_loss: 1.1621 
2025-08-11 11:00:19,731 - INFO - Epoch: 221/500, Iter: 9/119 -- train_loss: 1.1728 
2025-08-11 11:00:20,011 - INFO - Epoch: 221/500, Iter: 10/119 -- train_loss: 1.1413 
2025-08-11 11:00:20,290 - INFO - Epoch: 221/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 11:00:20,592 - INFO - Epoch: 221/500, Iter: 12/119 -- train_loss: 1.0764 
2025-08-11 11:00:20,902 - INFO - Epoch: 221/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:03:59,211 - INFO - Epoch: 222/500, Iter: 1/119 -- train_loss: 1.1402 


[1/119]   1%|           [00:00<?]

2025-08-11 11:04:02,983 - INFO - Epoch: 222/500, Iter: 2/119 -- train_loss: 1.1511 
2025-08-11 11:04:03,283 - INFO - Epoch: 222/500, Iter: 3/119 -- train_loss: 1.1442 
2025-08-11 11:04:03,616 - INFO - Epoch: 222/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 11:04:06,716 - INFO - Epoch: 222/500, Iter: 5/119 -- train_loss: 1.1745 
2025-08-11 11:04:06,999 - INFO - Epoch: 222/500, Iter: 6/119 -- train_loss: 1.1697 
2025-08-11 11:04:07,300 - INFO - Epoch: 222/500, Iter: 7/119 -- train_loss: 1.1534 
2025-08-11 11:04:07,601 - INFO - Epoch: 222/500, Iter: 8/119 -- train_loss: 1.1700 
2025-08-11 11:04:07,883 - INFO - Epoch: 222/500, Iter: 9/119 -- train_loss: 1.1732 
2025-08-11 11:04:10,726 - INFO - Epoch: 222/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 11:04:11,031 - INFO - Epoch: 222/500, Iter: 11/119 -- train_loss: 1.1047 
2025-08-11 11:04:15,975 - INFO - Epoch: 222/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 11:04:19,749 - INFO - Epoch: 222/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:07:50,784 - INFO - Epoch: 223/500, Iter: 1/119 -- train_loss: 1.1761 


[1/119]   1%|           [00:00<?]

2025-08-11 11:07:52,118 - INFO - Epoch: 223/500, Iter: 2/119 -- train_loss: 1.1803 
2025-08-11 11:07:52,451 - INFO - Epoch: 223/500, Iter: 3/119 -- train_loss: 1.1098 
2025-08-11 11:07:55,547 - INFO - Epoch: 223/500, Iter: 4/119 -- train_loss: 1.1744 
2025-08-11 11:07:55,889 - INFO - Epoch: 223/500, Iter: 5/119 -- train_loss: 1.1525 
2025-08-11 11:07:56,200 - INFO - Epoch: 223/500, Iter: 6/119 -- train_loss: 1.1629 
2025-08-11 11:07:56,508 - INFO - Epoch: 223/500, Iter: 7/119 -- train_loss: 0.9277 
2025-08-11 11:07:56,874 - INFO - Epoch: 223/500, Iter: 8/119 -- train_loss: 1.1754 
2025-08-11 11:07:57,566 - INFO - Epoch: 223/500, Iter: 9/119 -- train_loss: 1.1696 
2025-08-11 11:07:59,455 - INFO - Epoch: 223/500, Iter: 10/119 -- train_loss: 1.1624 
2025-08-11 11:07:59,780 - INFO - Epoch: 223/500, Iter: 11/119 -- train_loss: 1.1494 
2025-08-11 11:08:06,172 - INFO - Epoch: 223/500, Iter: 12/119 -- train_loss: 1.1716 
2025-08-11 11:08:06,489 - INFO - Epoch: 223/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:11:32,715 - INFO - Epoch: 224/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 11:11:33,451 - INFO - Epoch: 224/500, Iter: 2/119 -- train_loss: 1.1703 
2025-08-11 11:11:33,762 - INFO - Epoch: 224/500, Iter: 3/119 -- train_loss: 1.1715 
2025-08-11 11:11:34,062 - INFO - Epoch: 224/500, Iter: 4/119 -- train_loss: 1.1624 
2025-08-11 11:11:35,023 - INFO - Epoch: 224/500, Iter: 5/119 -- train_loss: 1.1199 
2025-08-11 11:11:36,655 - INFO - Epoch: 224/500, Iter: 6/119 -- train_loss: 1.0755 
2025-08-11 11:11:36,979 - INFO - Epoch: 224/500, Iter: 7/119 -- train_loss: 1.1723 
2025-08-11 11:11:37,263 - INFO - Epoch: 224/500, Iter: 8/119 -- train_loss: 1.1731 
2025-08-11 11:11:41,812 - INFO - Epoch: 224/500, Iter: 9/119 -- train_loss: 0.8775 
2025-08-11 11:11:42,162 - INFO - Epoch: 224/500, Iter: 10/119 -- train_loss: 1.1439 
2025-08-11 11:11:44,205 - INFO - Epoch: 224/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 11:11:44,533 - INFO - Epoch: 224/500, Iter: 12/119 -- train_loss: 1.1055 
2025-08-11 11:11:44,861 - INFO - Epoch: 224/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:15:24,827 - INFO - Epoch: 225/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 11:15:25,111 - INFO - Epoch: 225/500, Iter: 2/119 -- train_loss: 1.1079 
2025-08-11 11:15:25,397 - INFO - Epoch: 225/500, Iter: 3/119 -- train_loss: 1.1768 
2025-08-11 11:15:25,682 - INFO - Epoch: 225/500, Iter: 4/119 -- train_loss: 1.1398 
2025-08-11 11:15:26,020 - INFO - Epoch: 225/500, Iter: 5/119 -- train_loss: 1.1747 
2025-08-11 11:15:26,349 - INFO - Epoch: 225/500, Iter: 6/119 -- train_loss: 1.1723 
2025-08-11 11:15:26,680 - INFO - Epoch: 225/500, Iter: 7/119 -- train_loss: 1.0695 
2025-08-11 11:15:26,983 - INFO - Epoch: 225/500, Iter: 8/119 -- train_loss: 1.1050 
2025-08-11 11:15:30,714 - INFO - Epoch: 225/500, Iter: 9/119 -- train_loss: 1.1649 
2025-08-11 11:15:34,555 - INFO - Epoch: 225/500, Iter: 10/119 -- train_loss: 1.1739 
2025-08-11 11:15:34,825 - INFO - Epoch: 225/500, Iter: 11/119 -- train_loss: 1.1701 
2025-08-11 11:15:35,128 - INFO - Epoch: 225/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 11:15:35,439 - INFO - Epoch: 225/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:19:09,376 - INFO - Epoch: 226/500, Iter: 1/119 -- train_loss: 1.1773 


[1/119]   1%|           [00:00<?]

2025-08-11 11:19:10,186 - INFO - Epoch: 226/500, Iter: 2/119 -- train_loss: 0.9515 
2025-08-11 11:19:10,485 - INFO - Epoch: 226/500, Iter: 3/119 -- train_loss: 1.1581 
2025-08-11 11:19:11,929 - INFO - Epoch: 226/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 11:19:12,251 - INFO - Epoch: 226/500, Iter: 5/119 -- train_loss: 1.1220 
2025-08-11 11:19:17,341 - INFO - Epoch: 226/500, Iter: 6/119 -- train_loss: 1.1673 
2025-08-11 11:19:17,625 - INFO - Epoch: 226/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 11:19:17,921 - INFO - Epoch: 226/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 11:19:20,451 - INFO - Epoch: 226/500, Iter: 9/119 -- train_loss: 1.1730 
2025-08-11 11:19:24,318 - INFO - Epoch: 226/500, Iter: 10/119 -- train_loss: 1.1340 
2025-08-11 11:19:24,632 - INFO - Epoch: 226/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 11:19:24,908 - INFO - Epoch: 226/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 11:19:25,208 - INFO - Epoch: 226/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:23:04,258 - INFO - Epoch: 227/500, Iter: 1/119 -- train_loss: 1.1382 


[1/119]   1%|           [00:00<?]

2025-08-11 11:23:04,552 - INFO - Epoch: 227/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 11:23:04,868 - INFO - Epoch: 227/500, Iter: 3/119 -- train_loss: 1.1745 
2025-08-11 11:23:05,189 - INFO - Epoch: 227/500, Iter: 4/119 -- train_loss: 1.1725 
2025-08-11 11:23:09,148 - INFO - Epoch: 227/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 11:23:09,495 - INFO - Epoch: 227/500, Iter: 6/119 -- train_loss: 1.1666 
2025-08-11 11:23:09,791 - INFO - Epoch: 227/500, Iter: 7/119 -- train_loss: 1.1709 
2025-08-11 11:23:10,103 - INFO - Epoch: 227/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 11:23:12,581 - INFO - Epoch: 227/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 11:23:17,102 - INFO - Epoch: 227/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 11:23:17,367 - INFO - Epoch: 227/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 11:23:17,663 - INFO - Epoch: 227/500, Iter: 12/119 -- train_loss: 1.1744 
2025-08-11 11:23:17,965 - INFO - Epoch: 227/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:26:48,979 - INFO - Epoch: 228/500, Iter: 1/119 -- train_loss: 1.1503 


[1/119]   1%|           [00:00<?]

2025-08-11 11:26:51,522 - INFO - Epoch: 228/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 11:26:51,850 - INFO - Epoch: 228/500, Iter: 3/119 -- train_loss: 1.1603 
2025-08-11 11:26:52,172 - INFO - Epoch: 228/500, Iter: 4/119 -- train_loss: 1.1678 
2025-08-11 11:26:52,451 - INFO - Epoch: 228/500, Iter: 5/119 -- train_loss: 1.1699 
2025-08-11 11:26:52,772 - INFO - Epoch: 228/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 11:26:53,081 - INFO - Epoch: 228/500, Iter: 7/119 -- train_loss: 1.1367 
2025-08-11 11:26:53,422 - INFO - Epoch: 228/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 11:26:58,371 - INFO - Epoch: 228/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 11:26:58,650 - INFO - Epoch: 228/500, Iter: 10/119 -- train_loss: 1.1749 
2025-08-11 11:26:58,982 - INFO - Epoch: 228/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-11 11:26:59,303 - INFO - Epoch: 228/500, Iter: 12/119 -- train_loss: 1.1755 
2025-08-11 11:26:59,615 - INFO - Epoch: 228/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:30:44,504 - INFO - Epoch: 229/500, Iter: 1/119 -- train_loss: 1.1734 


[1/119]   1%|           [00:00<?]

2025-08-11 11:30:44,812 - INFO - Epoch: 229/500, Iter: 2/119 -- train_loss: 1.1810 
2025-08-11 11:30:45,078 - INFO - Epoch: 229/500, Iter: 3/119 -- train_loss: 1.1745 
2025-08-11 11:30:45,364 - INFO - Epoch: 229/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 11:30:45,645 - INFO - Epoch: 229/500, Iter: 5/119 -- train_loss: 1.1766 
2025-08-11 11:30:45,955 - INFO - Epoch: 229/500, Iter: 6/119 -- train_loss: 1.1708 
2025-08-11 11:30:46,262 - INFO - Epoch: 229/500, Iter: 7/119 -- train_loss: 1.1342 
2025-08-11 11:30:46,588 - INFO - Epoch: 229/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 11:30:54,316 - INFO - Epoch: 229/500, Iter: 9/119 -- train_loss: 1.1686 
2025-08-11 11:30:54,594 - INFO - Epoch: 229/500, Iter: 10/119 -- train_loss: 1.1745 
2025-08-11 11:30:54,861 - INFO - Epoch: 229/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 11:30:55,156 - INFO - Epoch: 229/500, Iter: 12/119 -- train_loss: 1.1762 
2025-08-11 11:30:55,471 - INFO - Epoch: 229/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:34:31,685 - INFO - Epoch: 230/500, Iter: 1/119 -- train_loss: 1.1366 


[1/119]   1%|           [00:00<?]

2025-08-11 11:34:32,090 - INFO - Epoch: 230/500, Iter: 2/119 -- train_loss: 1.1682 
2025-08-11 11:34:32,400 - INFO - Epoch: 230/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 11:34:34,770 - INFO - Epoch: 230/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 11:34:35,095 - INFO - Epoch: 230/500, Iter: 5/119 -- train_loss: 1.1724 
2025-08-11 11:34:35,408 - INFO - Epoch: 230/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 11:34:35,752 - INFO - Epoch: 230/500, Iter: 7/119 -- train_loss: 1.1515 
2025-08-11 11:34:36,077 - INFO - Epoch: 230/500, Iter: 8/119 -- train_loss: 1.1757 
2025-08-11 11:34:38,994 - INFO - Epoch: 230/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 11:34:40,615 - INFO - Epoch: 230/500, Iter: 10/119 -- train_loss: 1.1527 
2025-08-11 11:34:41,622 - INFO - Epoch: 230/500, Iter: 11/119 -- train_loss: 1.1569 
2025-08-11 11:34:44,869 - INFO - Epoch: 230/500, Iter: 12/119 -- train_loss: 0.9619 
2025-08-11 11:34:52,267 - INFO - Epoch: 230/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:38:26,592 - INFO - Epoch: 231/500, Iter: 1/119 -- train_loss: 0.9911 


[1/119]   1%|           [00:00<?]

2025-08-11 11:38:31,400 - INFO - Epoch: 231/500, Iter: 2/119 -- train_loss: 1.1105 
2025-08-11 11:38:31,713 - INFO - Epoch: 231/500, Iter: 3/119 -- train_loss: 1.1729 
2025-08-11 11:38:32,014 - INFO - Epoch: 231/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 11:38:32,335 - INFO - Epoch: 231/500, Iter: 5/119 -- train_loss: 1.1693 
2025-08-11 11:38:32,679 - INFO - Epoch: 231/500, Iter: 6/119 -- train_loss: 1.0754 
2025-08-11 11:38:32,975 - INFO - Epoch: 231/500, Iter: 7/119 -- train_loss: 1.0588 
2025-08-11 11:38:33,303 - INFO - Epoch: 231/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 11:38:39,958 - INFO - Epoch: 231/500, Iter: 9/119 -- train_loss: 1.0820 
2025-08-11 11:38:40,251 - INFO - Epoch: 231/500, Iter: 10/119 -- train_loss: 1.1166 
2025-08-11 11:38:40,589 - INFO - Epoch: 231/500, Iter: 11/119 -- train_loss: 1.1645 
2025-08-11 11:38:40,862 - INFO - Epoch: 231/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 11:38:41,168 - INFO - Epoch: 231/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:42:25,584 - INFO - Epoch: 232/500, Iter: 1/119 -- train_loss: 1.1703 


[1/119]   1%|           [00:00<?]

2025-08-11 11:42:25,923 - INFO - Epoch: 232/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 11:42:26,219 - INFO - Epoch: 232/500, Iter: 3/119 -- train_loss: 1.1034 
2025-08-11 11:42:26,530 - INFO - Epoch: 232/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 11:42:26,838 - INFO - Epoch: 232/500, Iter: 5/119 -- train_loss: 1.1416 
2025-08-11 11:42:27,125 - INFO - Epoch: 232/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 11:42:27,410 - INFO - Epoch: 232/500, Iter: 7/119 -- train_loss: 1.1641 
2025-08-11 11:42:27,712 - INFO - Epoch: 232/500, Iter: 8/119 -- train_loss: 1.1373 
2025-08-11 11:42:34,486 - INFO - Epoch: 232/500, Iter: 9/119 -- train_loss: 1.1503 
2025-08-11 11:42:34,810 - INFO - Epoch: 232/500, Iter: 10/119 -- train_loss: 1.1466 
2025-08-11 11:42:35,081 - INFO - Epoch: 232/500, Iter: 11/119 -- train_loss: 1.0914 
2025-08-11 11:42:37,290 - INFO - Epoch: 232/500, Iter: 12/119 -- train_loss: 1.1531 
2025-08-11 11:42:37,588 - INFO - Epoch: 232/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:46:22,254 - INFO - Epoch: 233/500, Iter: 1/119 -- train_loss: 0.9969 


[1/119]   1%|           [00:00<?]

2025-08-11 11:46:22,583 - INFO - Epoch: 233/500, Iter: 2/119 -- train_loss: 1.1728 
2025-08-11 11:46:22,905 - INFO - Epoch: 233/500, Iter: 3/119 -- train_loss: 1.1466 
2025-08-11 11:46:24,502 - INFO - Epoch: 233/500, Iter: 4/119 -- train_loss: 1.1357 
2025-08-11 11:46:27,355 - INFO - Epoch: 233/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 11:46:27,664 - INFO - Epoch: 233/500, Iter: 6/119 -- train_loss: 1.1113 
2025-08-11 11:46:27,947 - INFO - Epoch: 233/500, Iter: 7/119 -- train_loss: 1.1623 
2025-08-11 11:46:28,228 - INFO - Epoch: 233/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 11:46:28,566 - INFO - Epoch: 233/500, Iter: 9/119 -- train_loss: 1.0905 
2025-08-11 11:46:29,846 - INFO - Epoch: 233/500, Iter: 10/119 -- train_loss: 1.1774 
2025-08-11 11:46:30,167 - INFO - Epoch: 233/500, Iter: 11/119 -- train_loss: 1.2092 
2025-08-11 11:46:30,472 - INFO - Epoch: 233/500, Iter: 12/119 -- train_loss: 1.1761 
2025-08-11 11:46:34,378 - INFO - Epoch: 233/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:50:16,003 - INFO - Epoch: 234/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 11:50:19,473 - INFO - Epoch: 234/500, Iter: 2/119 -- train_loss: 1.1382 
2025-08-11 11:50:19,795 - INFO - Epoch: 234/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 11:50:20,088 - INFO - Epoch: 234/500, Iter: 4/119 -- train_loss: 1.1786 
2025-08-11 11:50:20,395 - INFO - Epoch: 234/500, Iter: 5/119 -- train_loss: 1.1366 
2025-08-11 11:50:25,145 - INFO - Epoch: 234/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 11:50:25,465 - INFO - Epoch: 234/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 11:50:25,758 - INFO - Epoch: 234/500, Iter: 8/119 -- train_loss: 1.1522 
2025-08-11 11:50:26,083 - INFO - Epoch: 234/500, Iter: 9/119 -- train_loss: 1.1497 
2025-08-11 11:50:26,389 - INFO - Epoch: 234/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 11:50:26,685 - INFO - Epoch: 234/500, Iter: 11/119 -- train_loss: 1.0803 
2025-08-11 11:50:26,983 - INFO - Epoch: 234/500, Iter: 12/119 -- train_loss: 1.1515 
2025-08-11 11:50:27,300 - INFO - Epoch: 234/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:54:19,871 - INFO - Epoch: 235/500, Iter: 1/119 -- train_loss: 1.1731 


[1/119]   1%|           [00:00<?]

2025-08-11 11:54:21,110 - INFO - Epoch: 235/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 11:54:21,441 - INFO - Epoch: 235/500, Iter: 3/119 -- train_loss: 1.0328 
2025-08-11 11:54:21,765 - INFO - Epoch: 235/500, Iter: 4/119 -- train_loss: 1.1543 
2025-08-11 11:54:22,080 - INFO - Epoch: 235/500, Iter: 5/119 -- train_loss: 1.1827 
2025-08-11 11:54:22,416 - INFO - Epoch: 235/500, Iter: 6/119 -- train_loss: 1.1415 
2025-08-11 11:54:22,693 - INFO - Epoch: 235/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 11:54:22,976 - INFO - Epoch: 235/500, Iter: 8/119 -- train_loss: 1.1708 
2025-08-11 11:54:26,433 - INFO - Epoch: 235/500, Iter: 9/119 -- train_loss: 1.1109 
2025-08-11 11:54:31,387 - INFO - Epoch: 235/500, Iter: 10/119 -- train_loss: 1.1447 
2025-08-11 11:54:36,853 - INFO - Epoch: 235/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 11:54:37,178 - INFO - Epoch: 235/500, Iter: 12/119 -- train_loss: 1.1445 
2025-08-11 11:54:37,462 - INFO - Epoch: 235/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 11:58:16,480 - INFO - Epoch: 236/500, Iter: 1/119 -- train_loss: 1.0874 


[1/119]   1%|           [00:00<?]

2025-08-11 11:58:17,836 - INFO - Epoch: 236/500, Iter: 2/119 -- train_loss: 1.1748 
2025-08-11 11:58:18,146 - INFO - Epoch: 236/500, Iter: 3/119 -- train_loss: 1.1692 
2025-08-11 11:58:20,684 - INFO - Epoch: 236/500, Iter: 4/119 -- train_loss: 1.1747 
2025-08-11 11:58:20,970 - INFO - Epoch: 236/500, Iter: 5/119 -- train_loss: 1.1744 
2025-08-11 11:58:21,272 - INFO - Epoch: 236/500, Iter: 6/119 -- train_loss: 1.1693 
2025-08-11 11:58:21,591 - INFO - Epoch: 236/500, Iter: 7/119 -- train_loss: 1.1746 
2025-08-11 11:58:21,916 - INFO - Epoch: 236/500, Iter: 8/119 -- train_loss: 1.1752 
2025-08-11 11:58:23,493 - INFO - Epoch: 236/500, Iter: 9/119 -- train_loss: 1.1687 
2025-08-11 11:58:23,819 - INFO - Epoch: 236/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 11:58:30,840 - INFO - Epoch: 236/500, Iter: 11/119 -- train_loss: 1.1551 
2025-08-11 11:58:32,924 - INFO - Epoch: 236/500, Iter: 12/119 -- train_loss: 1.1525 
2025-08-11 11:58:33,244 - INFO - Epoch: 236/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:02:18,766 - INFO - Epoch: 237/500, Iter: 1/119 -- train_loss: 1.1799 


[1/119]   1%|           [00:00<?]

2025-08-11 12:02:19,100 - INFO - Epoch: 237/500, Iter: 2/119 -- train_loss: 1.1754 
2025-08-11 12:02:19,419 - INFO - Epoch: 237/500, Iter: 3/119 -- train_loss: 1.1719 
2025-08-11 12:02:19,758 - INFO - Epoch: 237/500, Iter: 4/119 -- train_loss: 1.1693 
2025-08-11 12:02:20,068 - INFO - Epoch: 237/500, Iter: 5/119 -- train_loss: 1.1746 
2025-08-11 12:02:20,386 - INFO - Epoch: 237/500, Iter: 6/119 -- train_loss: 1.1764 
2025-08-11 12:02:20,681 - INFO - Epoch: 237/500, Iter: 7/119 -- train_loss: 1.1536 
2025-08-11 12:02:21,010 - INFO - Epoch: 237/500, Iter: 8/119 -- train_loss: 1.1647 
2025-08-11 12:02:27,165 - INFO - Epoch: 237/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 12:02:27,481 - INFO - Epoch: 237/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 12:02:27,807 - INFO - Epoch: 237/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 12:02:32,357 - INFO - Epoch: 237/500, Iter: 12/119 -- train_loss: 1.1616 
2025-08-11 12:02:32,648 - INFO - Epoch: 237/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:06:00,077 - INFO - Epoch: 238/500, Iter: 1/119 -- train_loss: 1.1655 


[1/119]   1%|           [00:00<?]

2025-08-11 12:06:00,355 - INFO - Epoch: 238/500, Iter: 2/119 -- train_loss: 1.1594 
2025-08-11 12:06:00,660 - INFO - Epoch: 238/500, Iter: 3/119 -- train_loss: 1.1433 
2025-08-11 12:06:06,461 - INFO - Epoch: 238/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 12:06:06,715 - INFO - Epoch: 238/500, Iter: 5/119 -- train_loss: 1.1696 
2025-08-11 12:06:07,021 - INFO - Epoch: 238/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 12:06:07,274 - INFO - Epoch: 238/500, Iter: 7/119 -- train_loss: 1.1733 
2025-08-11 12:06:07,572 - INFO - Epoch: 238/500, Iter: 8/119 -- train_loss: 1.1686 
2025-08-11 12:06:12,491 - INFO - Epoch: 238/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 12:06:12,814 - INFO - Epoch: 238/500, Iter: 10/119 -- train_loss: 1.1437 
2025-08-11 12:06:13,121 - INFO - Epoch: 238/500, Iter: 11/119 -- train_loss: 1.1449 
2025-08-11 12:06:13,438 - INFO - Epoch: 238/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 12:06:13,714 - INFO - Epoch: 238/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:09:47,413 - INFO - Epoch: 239/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 12:09:47,724 - INFO - Epoch: 239/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 12:09:49,578 - INFO - Epoch: 239/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 12:09:49,912 - INFO - Epoch: 239/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 12:09:50,216 - INFO - Epoch: 239/500, Iter: 5/119 -- train_loss: 1.1672 
2025-08-11 12:09:51,779 - INFO - Epoch: 239/500, Iter: 6/119 -- train_loss: 1.1715 
2025-08-11 12:09:52,075 - INFO - Epoch: 239/500, Iter: 7/119 -- train_loss: 1.1744 
2025-08-11 12:09:52,831 - INFO - Epoch: 239/500, Iter: 8/119 -- train_loss: 1.1670 
2025-08-11 12:09:53,145 - INFO - Epoch: 239/500, Iter: 9/119 -- train_loss: 1.1404 
2025-08-11 12:09:53,467 - INFO - Epoch: 239/500, Iter: 10/119 -- train_loss: 1.1613 
2025-08-11 12:09:53,985 - INFO - Epoch: 239/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 12:09:54,402 - INFO - Epoch: 239/500, Iter: 12/119 -- train_loss: 1.1753 
2025-08-11 12:09:54,724 - INFO - Epoch: 239/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:13:35,967 - INFO - Epoch: 240/500, Iter: 1/119 -- train_loss: 1.1481 


[1/119]   1%|           [00:00<?]

2025-08-11 12:13:36,325 - INFO - Epoch: 240/500, Iter: 2/119 -- train_loss: 1.1768 
2025-08-11 12:13:36,647 - INFO - Epoch: 240/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 12:13:36,955 - INFO - Epoch: 240/500, Iter: 4/119 -- train_loss: 1.1711 
2025-08-11 12:13:37,280 - INFO - Epoch: 240/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 12:13:37,563 - INFO - Epoch: 240/500, Iter: 6/119 -- train_loss: 1.1503 
2025-08-11 12:13:37,886 - INFO - Epoch: 240/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 12:13:38,203 - INFO - Epoch: 240/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 12:13:39,970 - INFO - Epoch: 240/500, Iter: 9/119 -- train_loss: 1.0991 
2025-08-11 12:13:45,002 - INFO - Epoch: 240/500, Iter: 10/119 -- train_loss: 1.1381 
2025-08-11 12:13:45,327 - INFO - Epoch: 240/500, Iter: 11/119 -- train_loss: 1.1674 
2025-08-11 12:13:45,668 - INFO - Epoch: 240/500, Iter: 12/119 -- train_loss: 0.9787 
2025-08-11 12:13:45,989 - INFO - Epoch: 240/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:17:19,700 - INFO - Epoch: 241/500, Iter: 1/119 -- train_loss: 1.1744 


[1/119]   1%|           [00:00<?]

2025-08-11 12:17:22,777 - INFO - Epoch: 241/500, Iter: 2/119 -- train_loss: 1.1659 
2025-08-11 12:17:23,066 - INFO - Epoch: 241/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 12:17:23,344 - INFO - Epoch: 241/500, Iter: 4/119 -- train_loss: 1.1488 
2025-08-11 12:17:23,934 - INFO - Epoch: 241/500, Iter: 5/119 -- train_loss: 1.1617 
2025-08-11 12:17:24,275 - INFO - Epoch: 241/500, Iter: 6/119 -- train_loss: 1.1680 
2025-08-11 12:17:24,620 - INFO - Epoch: 241/500, Iter: 7/119 -- train_loss: 1.1746 
2025-08-11 12:17:24,920 - INFO - Epoch: 241/500, Iter: 8/119 -- train_loss: 1.1634 
2025-08-11 12:17:30,077 - INFO - Epoch: 241/500, Iter: 9/119 -- train_loss: 1.1460 
2025-08-11 12:17:30,394 - INFO - Epoch: 241/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 12:17:30,689 - INFO - Epoch: 241/500, Iter: 11/119 -- train_loss: 1.1371 
2025-08-11 12:17:30,993 - INFO - Epoch: 241/500, Iter: 12/119 -- train_loss: 1.1726 
2025-08-11 12:17:36,253 - INFO - Epoch: 241/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:21:01,244 - INFO - Epoch: 242/500, Iter: 1/119 -- train_loss: 1.1260 


[1/119]   1%|           [00:00<?]

2025-08-11 12:21:02,652 - INFO - Epoch: 242/500, Iter: 2/119 -- train_loss: 1.1664 
2025-08-11 12:21:04,818 - INFO - Epoch: 242/500, Iter: 3/119 -- train_loss: 1.1902 
2025-08-11 12:21:05,147 - INFO - Epoch: 242/500, Iter: 4/119 -- train_loss: 1.1837 
2025-08-11 12:21:05,444 - INFO - Epoch: 242/500, Iter: 5/119 -- train_loss: 1.1564 
2025-08-11 12:21:05,785 - INFO - Epoch: 242/500, Iter: 6/119 -- train_loss: 1.0858 
2025-08-11 12:21:06,585 - INFO - Epoch: 242/500, Iter: 7/119 -- train_loss: 1.1612 
2025-08-11 12:21:06,861 - INFO - Epoch: 242/500, Iter: 8/119 -- train_loss: 1.0917 
2025-08-11 12:21:13,268 - INFO - Epoch: 242/500, Iter: 9/119 -- train_loss: 1.1337 
2025-08-11 12:21:13,561 - INFO - Epoch: 242/500, Iter: 10/119 -- train_loss: 1.1493 
2025-08-11 12:21:13,862 - INFO - Epoch: 242/500, Iter: 11/119 -- train_loss: 1.1504 
2025-08-11 12:21:14,138 - INFO - Epoch: 242/500, Iter: 12/119 -- train_loss: 1.1406 
2025-08-11 12:21:14,434 - INFO - Epoch: 242/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:24:55,142 - INFO - Epoch: 243/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 12:24:55,425 - INFO - Epoch: 243/500, Iter: 2/119 -- train_loss: 1.1452 
2025-08-11 12:24:59,275 - INFO - Epoch: 243/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 12:24:59,590 - INFO - Epoch: 243/500, Iter: 4/119 -- train_loss: 1.1582 
2025-08-11 12:24:59,908 - INFO - Epoch: 243/500, Iter: 5/119 -- train_loss: 0.9034 
2025-08-11 12:25:00,224 - INFO - Epoch: 243/500, Iter: 6/119 -- train_loss: 1.1562 
2025-08-11 12:25:00,508 - INFO - Epoch: 243/500, Iter: 7/119 -- train_loss: 1.1727 
2025-08-11 12:25:00,825 - INFO - Epoch: 243/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 12:25:05,558 - INFO - Epoch: 243/500, Iter: 9/119 -- train_loss: 1.1725 
2025-08-11 12:25:05,875 - INFO - Epoch: 243/500, Iter: 10/119 -- train_loss: 1.1448 
2025-08-11 12:25:06,158 - INFO - Epoch: 243/500, Iter: 11/119 -- train_loss: 1.1820 
2025-08-11 12:25:06,472 - INFO - Epoch: 243/500, Iter: 12/119 -- train_loss: 1.1696 
2025-08-11 12:25:06,775 - INFO - Epoch: 243/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:28:42,115 - INFO - Epoch: 244/500, Iter: 1/119 -- train_loss: 1.1721 


[1/119]   1%|           [00:00<?]

2025-08-11 12:28:42,449 - INFO - Epoch: 244/500, Iter: 2/119 -- train_loss: 1.1730 
2025-08-11 12:28:44,799 - INFO - Epoch: 244/500, Iter: 3/119 -- train_loss: 1.1730 
2025-08-11 12:28:45,132 - INFO - Epoch: 244/500, Iter: 4/119 -- train_loss: 1.1625 
2025-08-11 12:28:45,442 - INFO - Epoch: 244/500, Iter: 5/119 -- train_loss: 1.1746 
2025-08-11 12:28:45,742 - INFO - Epoch: 244/500, Iter: 6/119 -- train_loss: 1.1591 
2025-08-11 12:28:46,032 - INFO - Epoch: 244/500, Iter: 7/119 -- train_loss: 1.1423 
2025-08-11 12:28:46,382 - INFO - Epoch: 244/500, Iter: 8/119 -- train_loss: 1.1535 
2025-08-11 12:28:51,242 - INFO - Epoch: 244/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 12:28:52,429 - INFO - Epoch: 244/500, Iter: 10/119 -- train_loss: 1.1778 
2025-08-11 12:28:52,731 - INFO - Epoch: 244/500, Iter: 11/119 -- train_loss: 1.1224 
2025-08-11 12:28:53,030 - INFO - Epoch: 244/500, Iter: 12/119 -- train_loss: 1.1771 
2025-08-11 12:28:53,338 - INFO - Epoch: 244/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:32:25,211 - INFO - Epoch: 245/500, Iter: 1/119 -- train_loss: 1.1746 


[1/119]   1%|           [00:00<?]

2025-08-11 12:32:26,473 - INFO - Epoch: 245/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 12:32:26,790 - INFO - Epoch: 245/500, Iter: 3/119 -- train_loss: 1.1681 
2025-08-11 12:32:27,106 - INFO - Epoch: 245/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 12:32:27,379 - INFO - Epoch: 245/500, Iter: 5/119 -- train_loss: 1.1744 
2025-08-11 12:32:27,673 - INFO - Epoch: 245/500, Iter: 6/119 -- train_loss: 1.1262 
2025-08-11 12:32:27,999 - INFO - Epoch: 245/500, Iter: 7/119 -- train_loss: 1.1668 
2025-08-11 12:32:28,357 - INFO - Epoch: 245/500, Iter: 8/119 -- train_loss: 1.1502 
2025-08-11 12:32:29,894 - INFO - Epoch: 245/500, Iter: 9/119 -- train_loss: 1.1722 
2025-08-11 12:32:32,394 - INFO - Epoch: 245/500, Iter: 10/119 -- train_loss: 1.1241 
2025-08-11 12:32:32,728 - INFO - Epoch: 245/500, Iter: 11/119 -- train_loss: 1.1750 
2025-08-11 12:32:33,399 - INFO - Epoch: 245/500, Iter: 12/119 -- train_loss: 0.9408 
2025-08-11 12:32:33,684 - INFO - Epoch: 245/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:36:12,179 - INFO - Epoch: 246/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 12:36:12,648 - INFO - Epoch: 246/500, Iter: 2/119 -- train_loss: 1.1314 
2025-08-11 12:36:14,381 - INFO - Epoch: 246/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 12:36:14,731 - INFO - Epoch: 246/500, Iter: 4/119 -- train_loss: 1.1156 
2025-08-11 12:36:15,023 - INFO - Epoch: 246/500, Iter: 5/119 -- train_loss: 1.1744 
2025-08-11 12:36:15,357 - INFO - Epoch: 246/500, Iter: 6/119 -- train_loss: 1.1703 
2025-08-11 12:36:15,673 - INFO - Epoch: 246/500, Iter: 7/119 -- train_loss: 1.1750 
2025-08-11 12:36:15,964 - INFO - Epoch: 246/500, Iter: 8/119 -- train_loss: 1.1549 
2025-08-11 12:36:22,547 - INFO - Epoch: 246/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 12:36:22,812 - INFO - Epoch: 246/500, Iter: 10/119 -- train_loss: 1.1671 
2025-08-11 12:36:23,114 - INFO - Epoch: 246/500, Iter: 11/119 -- train_loss: 0.9731 
2025-08-11 12:36:23,423 - INFO - Epoch: 246/500, Iter: 12/119 -- train_loss: 1.1685 
2025-08-11 12:36:23,697 - INFO - Epoch: 246/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:39:58,459 - INFO - Epoch: 247/500, Iter: 1/119 -- train_loss: 1.1161 


[1/119]   1%|           [00:00<?]

2025-08-11 12:40:01,581 - INFO - Epoch: 247/500, Iter: 2/119 -- train_loss: 1.1451 
2025-08-11 12:40:01,891 - INFO - Epoch: 247/500, Iter: 3/119 -- train_loss: 1.1781 
2025-08-11 12:40:02,667 - INFO - Epoch: 247/500, Iter: 4/119 -- train_loss: 1.1402 
2025-08-11 12:40:02,971 - INFO - Epoch: 247/500, Iter: 5/119 -- train_loss: 1.1461 
2025-08-11 12:40:03,306 - INFO - Epoch: 247/500, Iter: 6/119 -- train_loss: 1.1777 
2025-08-11 12:40:03,594 - INFO - Epoch: 247/500, Iter: 7/119 -- train_loss: 1.1745 
2025-08-11 12:40:03,888 - INFO - Epoch: 247/500, Iter: 8/119 -- train_loss: 1.1601 
2025-08-11 12:40:07,414 - INFO - Epoch: 247/500, Iter: 9/119 -- train_loss: 1.0838 
2025-08-11 12:40:07,712 - INFO - Epoch: 247/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 12:40:08,014 - INFO - Epoch: 247/500, Iter: 11/119 -- train_loss: 1.1460 
2025-08-11 12:40:15,087 - INFO - Epoch: 247/500, Iter: 12/119 -- train_loss: 1.1416 
2025-08-11 12:40:15,354 - INFO - Epoch: 247/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:43:49,641 - INFO - Epoch: 248/500, Iter: 1/119 -- train_loss: 1.1496 


[1/119]   1%|           [00:00<?]

2025-08-11 12:43:55,024 - INFO - Epoch: 248/500, Iter: 2/119 -- train_loss: 1.1553 
2025-08-11 12:43:55,345 - INFO - Epoch: 248/500, Iter: 3/119 -- train_loss: 1.1533 
2025-08-11 12:43:55,628 - INFO - Epoch: 248/500, Iter: 4/119 -- train_loss: 1.1663 
2025-08-11 12:43:55,930 - INFO - Epoch: 248/500, Iter: 5/119 -- train_loss: 1.1271 
2025-08-11 12:43:56,221 - INFO - Epoch: 248/500, Iter: 6/119 -- train_loss: 1.1437 
2025-08-11 12:43:56,523 - INFO - Epoch: 248/500, Iter: 7/119 -- train_loss: 1.1593 
2025-08-11 12:43:56,828 - INFO - Epoch: 248/500, Iter: 8/119 -- train_loss: 1.1789 
2025-08-11 12:44:06,876 - INFO - Epoch: 248/500, Iter: 9/119 -- train_loss: 1.1679 
2025-08-11 12:44:07,144 - INFO - Epoch: 248/500, Iter: 10/119 -- train_loss: 1.1788 
2025-08-11 12:44:07,432 - INFO - Epoch: 248/500, Iter: 11/119 -- train_loss: 1.1709 
2025-08-11 12:44:07,721 - INFO - Epoch: 248/500, Iter: 12/119 -- train_loss: 1.1690 
2025-08-11 12:44:08,028 - INFO - Epoch: 248/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:47:33,625 - INFO - Epoch: 249/500, Iter: 1/119 -- train_loss: 1.1741 


[1/119]   1%|           [00:00<?]

2025-08-11 12:47:35,257 - INFO - Epoch: 249/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 12:47:39,352 - INFO - Epoch: 249/500, Iter: 3/119 -- train_loss: 1.1675 
2025-08-11 12:47:39,702 - INFO - Epoch: 249/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 12:47:39,979 - INFO - Epoch: 249/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 12:47:40,297 - INFO - Epoch: 249/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 12:47:42,669 - INFO - Epoch: 249/500, Iter: 7/119 -- train_loss: 1.1743 
2025-08-11 12:47:42,986 - INFO - Epoch: 249/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 12:47:43,261 - INFO - Epoch: 249/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 12:47:44,822 - INFO - Epoch: 249/500, Iter: 10/119 -- train_loss: 1.1195 
2025-08-11 12:47:49,145 - INFO - Epoch: 249/500, Iter: 11/119 -- train_loss: 1.1504 
2025-08-11 12:47:49,488 - INFO - Epoch: 249/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 12:47:49,791 - INFO - Epoch: 249/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:51:18,927 - INFO - Epoch: 250/500, Iter: 1/119 -- train_loss: 1.1743 


[1/119]   1%|           [00:00<?]

2025-08-11 12:51:19,213 - INFO - Epoch: 250/500, Iter: 2/119 -- train_loss: 1.1506 
2025-08-11 12:51:25,726 - INFO - Epoch: 250/500, Iter: 3/119 -- train_loss: 1.1597 
2025-08-11 12:51:26,048 - INFO - Epoch: 250/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 12:51:26,337 - INFO - Epoch: 250/500, Iter: 5/119 -- train_loss: 1.1323 
2025-08-11 12:51:26,622 - INFO - Epoch: 250/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 12:51:26,919 - INFO - Epoch: 250/500, Iter: 7/119 -- train_loss: 1.1207 
2025-08-11 12:51:27,210 - INFO - Epoch: 250/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 12:51:31,660 - INFO - Epoch: 250/500, Iter: 9/119 -- train_loss: 0.9087 
2025-08-11 12:51:31,966 - INFO - Epoch: 250/500, Iter: 10/119 -- train_loss: 1.1088 
2025-08-11 12:51:34,615 - INFO - Epoch: 250/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 12:51:34,909 - INFO - Epoch: 250/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 12:51:35,179 - INFO - Epoch: 250/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:55:14,617 - INFO - Epoch: 251/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 12:55:14,934 - INFO - Epoch: 251/500, Iter: 2/119 -- train_loss: 1.1550 
2025-08-11 12:55:15,234 - INFO - Epoch: 251/500, Iter: 3/119 -- train_loss: 1.1267 
2025-08-11 12:55:15,550 - INFO - Epoch: 251/500, Iter: 4/119 -- train_loss: 1.0796 
2025-08-11 12:55:15,867 - INFO - Epoch: 251/500, Iter: 5/119 -- train_loss: 1.1590 
2025-08-11 12:55:16,198 - INFO - Epoch: 251/500, Iter: 6/119 -- train_loss: 1.1693 
2025-08-11 12:55:16,534 - INFO - Epoch: 251/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 12:55:16,834 - INFO - Epoch: 251/500, Iter: 8/119 -- train_loss: 1.1760 
2025-08-11 12:55:22,216 - INFO - Epoch: 251/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 12:55:22,500 - INFO - Epoch: 251/500, Iter: 10/119 -- train_loss: 1.1700 
2025-08-11 12:55:22,773 - INFO - Epoch: 251/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 12:55:23,084 - INFO - Epoch: 251/500, Iter: 12/119 -- train_loss: 1.1716 
2025-08-11 12:55:23,400 - INFO - Epoch: 251/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 12:59:04,127 - INFO - Epoch: 252/500, Iter: 1/119 -- train_loss: 1.1750 


[1/119]   1%|           [00:00<?]

2025-08-11 12:59:04,406 - INFO - Epoch: 252/500, Iter: 2/119 -- train_loss: 1.1710 
2025-08-11 12:59:04,708 - INFO - Epoch: 252/500, Iter: 3/119 -- train_loss: 1.1750 
2025-08-11 12:59:05,024 - INFO - Epoch: 252/500, Iter: 4/119 -- train_loss: 1.1710 
2025-08-11 12:59:05,470 - INFO - Epoch: 252/500, Iter: 5/119 -- train_loss: 1.1695 
2025-08-11 12:59:05,788 - INFO - Epoch: 252/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 12:59:06,101 - INFO - Epoch: 252/500, Iter: 7/119 -- train_loss: 1.1386 
2025-08-11 12:59:06,385 - INFO - Epoch: 252/500, Iter: 8/119 -- train_loss: 1.1719 
2025-08-11 12:59:14,097 - INFO - Epoch: 252/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 12:59:14,358 - INFO - Epoch: 252/500, Iter: 10/119 -- train_loss: 1.0124 
2025-08-11 12:59:14,674 - INFO - Epoch: 252/500, Iter: 11/119 -- train_loss: 1.0868 
2025-08-11 12:59:14,991 - INFO - Epoch: 252/500, Iter: 12/119 -- train_loss: 1.1711 
2025-08-11 12:59:15,324 - INFO - Epoch: 252/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:02:48,332 - INFO - Epoch: 253/500, Iter: 1/119 -- train_loss: 1.1456 


[1/119]   1%|           [00:00<?]

2025-08-11 13:02:50,130 - INFO - Epoch: 253/500, Iter: 2/119 -- train_loss: 1.1500 
2025-08-11 13:02:50,437 - INFO - Epoch: 253/500, Iter: 3/119 -- train_loss: 1.1762 
2025-08-11 13:02:50,727 - INFO - Epoch: 253/500, Iter: 4/119 -- train_loss: 1.1584 
2025-08-11 13:02:51,029 - INFO - Epoch: 253/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 13:02:51,329 - INFO - Epoch: 253/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 13:02:52,729 - INFO - Epoch: 253/500, Iter: 7/119 -- train_loss: 1.1737 
2025-08-11 13:02:55,022 - INFO - Epoch: 253/500, Iter: 8/119 -- train_loss: 1.1696 
2025-08-11 13:02:58,455 - INFO - Epoch: 253/500, Iter: 9/119 -- train_loss: 1.1651 
2025-08-11 13:02:58,761 - INFO - Epoch: 253/500, Iter: 10/119 -- train_loss: 1.1249 
2025-08-11 13:02:59,102 - INFO - Epoch: 253/500, Iter: 11/119 -- train_loss: 1.1737 
2025-08-11 13:02:59,408 - INFO - Epoch: 253/500, Iter: 12/119 -- train_loss: 1.1751 
2025-08-11 13:02:59,680 - INFO - Epoch: 253/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:06:34,612 - INFO - Epoch: 254/500, Iter: 1/119 -- train_loss: 1.1746 


[1/119]   1%|           [00:00<?]

2025-08-11 13:06:34,939 - INFO - Epoch: 254/500, Iter: 2/119 -- train_loss: 1.1575 
2025-08-11 13:06:40,180 - INFO - Epoch: 254/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 13:06:40,460 - INFO - Epoch: 254/500, Iter: 4/119 -- train_loss: 1.1762 
2025-08-11 13:06:40,762 - INFO - Epoch: 254/500, Iter: 5/119 -- train_loss: 1.1252 
2025-08-11 13:06:41,077 - INFO - Epoch: 254/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 13:06:41,386 - INFO - Epoch: 254/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 13:06:41,676 - INFO - Epoch: 254/500, Iter: 8/119 -- train_loss: 1.1601 
2025-08-11 13:06:41,986 - INFO - Epoch: 254/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 13:06:42,291 - INFO - Epoch: 254/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 13:06:51,594 - INFO - Epoch: 254/500, Iter: 11/119 -- train_loss: 1.1746 
2025-08-11 13:06:51,857 - INFO - Epoch: 254/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 13:06:52,157 - INFO - Epoch: 254/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:10:12,769 - INFO - Epoch: 255/500, Iter: 1/119 -- train_loss: 1.1642 


[1/119]   1%|           [00:00<?]

2025-08-11 13:10:14,375 - INFO - Epoch: 255/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 13:10:14,703 - INFO - Epoch: 255/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 13:10:17,774 - INFO - Epoch: 255/500, Iter: 4/119 -- train_loss: 1.1682 
2025-08-11 13:10:18,064 - INFO - Epoch: 255/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 13:10:18,390 - INFO - Epoch: 255/500, Iter: 6/119 -- train_loss: 1.0930 
2025-08-11 13:10:18,673 - INFO - Epoch: 255/500, Iter: 7/119 -- train_loss: 1.1763 
2025-08-11 13:10:18,974 - INFO - Epoch: 255/500, Iter: 8/119 -- train_loss: 1.1142 
2025-08-11 13:10:19,285 - INFO - Epoch: 255/500, Iter: 9/119 -- train_loss: 1.1147 
2025-08-11 13:10:19,657 - INFO - Epoch: 255/500, Iter: 10/119 -- train_loss: 1.1551 
2025-08-11 13:10:22,064 - INFO - Epoch: 255/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 13:10:23,414 - INFO - Epoch: 255/500, Iter: 12/119 -- train_loss: 1.1511 
2025-08-11 13:10:23,683 - INFO - Epoch: 255/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:13:55,135 - INFO - Epoch: 256/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 13:14:03,666 - INFO - Epoch: 256/500, Iter: 2/119 -- train_loss: 1.1652 
2025-08-11 13:14:03,976 - INFO - Epoch: 256/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 13:14:04,244 - INFO - Epoch: 256/500, Iter: 4/119 -- train_loss: 1.1525 
2025-08-11 13:14:04,552 - INFO - Epoch: 256/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 13:14:04,847 - INFO - Epoch: 256/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 13:14:05,136 - INFO - Epoch: 256/500, Iter: 7/119 -- train_loss: 1.1343 
2025-08-11 13:14:05,426 - INFO - Epoch: 256/500, Iter: 8/119 -- train_loss: 1.1679 
2025-08-11 13:14:05,746 - INFO - Epoch: 256/500, Iter: 9/119 -- train_loss: 1.1558 
2025-08-11 13:14:14,258 - INFO - Epoch: 256/500, Iter: 10/119 -- train_loss: 1.1763 
2025-08-11 13:14:14,554 - INFO - Epoch: 256/500, Iter: 11/119 -- train_loss: 1.1712 
2025-08-11 13:14:14,847 - INFO - Epoch: 256/500, Iter: 12/119 -- train_loss: 1.1301 
2025-08-11 13:14:15,121 - INFO - Epoch: 256/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:17:54,241 - INFO - Epoch: 257/500, Iter: 1/119 -- train_loss: 0.9433 


[1/119]   1%|           [00:00<?]

2025-08-11 13:17:55,159 - INFO - Epoch: 257/500, Iter: 2/119 -- train_loss: 1.0339 
2025-08-11 13:17:55,462 - INFO - Epoch: 257/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 13:17:55,734 - INFO - Epoch: 257/500, Iter: 4/119 -- train_loss: 1.1465 
2025-08-11 13:17:56,050 - INFO - Epoch: 257/500, Iter: 5/119 -- train_loss: 1.1765 
2025-08-11 13:17:56,391 - INFO - Epoch: 257/500, Iter: 6/119 -- train_loss: 1.1541 
2025-08-11 13:17:56,714 - INFO - Epoch: 257/500, Iter: 7/119 -- train_loss: 1.1735 
2025-08-11 13:17:57,091 - INFO - Epoch: 257/500, Iter: 8/119 -- train_loss: 1.1749 
2025-08-11 13:18:02,207 - INFO - Epoch: 257/500, Iter: 9/119 -- train_loss: 1.1860 
2025-08-11 13:18:06,840 - INFO - Epoch: 257/500, Iter: 10/119 -- train_loss: 1.1589 
2025-08-11 13:18:07,136 - INFO - Epoch: 257/500, Iter: 11/119 -- train_loss: 1.1923 
2025-08-11 13:18:07,475 - INFO - Epoch: 257/500, Iter: 12/119 -- train_loss: 1.1577 
2025-08-11 13:18:07,787 - INFO - Epoch: 257/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:21:48,328 - INFO - Epoch: 258/500, Iter: 1/119 -- train_loss: 1.1722 


[1/119]   1%|           [00:00<?]

2025-08-11 13:21:59,095 - INFO - Epoch: 258/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 13:21:59,346 - INFO - Epoch: 258/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 13:21:59,616 - INFO - Epoch: 258/500, Iter: 4/119 -- train_loss: 1.1698 
2025-08-11 13:21:59,891 - INFO - Epoch: 258/500, Iter: 5/119 -- train_loss: 1.1502 
2025-08-11 13:22:00,170 - INFO - Epoch: 258/500, Iter: 6/119 -- train_loss: 1.1266 
2025-08-11 13:22:00,463 - INFO - Epoch: 258/500, Iter: 7/119 -- train_loss: 1.0569 
2025-08-11 13:22:00,763 - INFO - Epoch: 258/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 13:22:01,058 - INFO - Epoch: 258/500, Iter: 9/119 -- train_loss: 1.1662 
2025-08-11 13:22:08,104 - INFO - Epoch: 258/500, Iter: 10/119 -- train_loss: 1.0415 
2025-08-11 13:22:08,348 - INFO - Epoch: 258/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 13:22:08,623 - INFO - Epoch: 258/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 13:22:08,870 - INFO - Epoch: 258/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:25:37,019 - INFO - Epoch: 259/500, Iter: 1/119 -- train_loss: 1.1922 


[1/119]   1%|           [00:00<?]

2025-08-11 13:25:37,355 - INFO - Epoch: 259/500, Iter: 2/119 -- train_loss: 1.1775 
2025-08-11 13:25:37,646 - INFO - Epoch: 259/500, Iter: 3/119 -- train_loss: 1.1745 
2025-08-11 13:25:37,952 - INFO - Epoch: 259/500, Iter: 4/119 -- train_loss: 1.1699 
2025-08-11 13:25:38,237 - INFO - Epoch: 259/500, Iter: 5/119 -- train_loss: 1.1471 
2025-08-11 13:25:38,540 - INFO - Epoch: 259/500, Iter: 6/119 -- train_loss: 1.1791 
2025-08-11 13:25:38,856 - INFO - Epoch: 259/500, Iter: 7/119 -- train_loss: 1.1761 
2025-08-11 13:25:39,152 - INFO - Epoch: 259/500, Iter: 8/119 -- train_loss: 1.1751 
2025-08-11 13:25:44,641 - INFO - Epoch: 259/500, Iter: 9/119 -- train_loss: 1.1717 
2025-08-11 13:25:44,944 - INFO - Epoch: 259/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 13:25:45,261 - INFO - Epoch: 259/500, Iter: 11/119 -- train_loss: 1.1346 
2025-08-11 13:25:45,568 - INFO - Epoch: 259/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 13:25:45,868 - INFO - Epoch: 259/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:29:21,586 - INFO - Epoch: 260/500, Iter: 1/119 -- train_loss: 1.1615 


[1/119]   1%|           [00:00<?]

2025-08-11 13:29:21,900 - INFO - Epoch: 260/500, Iter: 2/119 -- train_loss: 1.1603 
2025-08-11 13:29:22,234 - INFO - Epoch: 260/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 13:29:22,550 - INFO - Epoch: 260/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 13:29:25,117 - INFO - Epoch: 260/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 13:29:25,399 - INFO - Epoch: 260/500, Iter: 6/119 -- train_loss: 1.0033 
2025-08-11 13:29:25,700 - INFO - Epoch: 260/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 13:29:25,992 - INFO - Epoch: 260/500, Iter: 8/119 -- train_loss: 1.1663 
2025-08-11 13:29:26,955 - INFO - Epoch: 260/500, Iter: 9/119 -- train_loss: 1.1606 
2025-08-11 13:29:27,259 - INFO - Epoch: 260/500, Iter: 10/119 -- train_loss: 1.1241 
2025-08-11 13:29:28,966 - INFO - Epoch: 260/500, Iter: 11/119 -- train_loss: 1.0801 
2025-08-11 13:29:29,260 - INFO - Epoch: 260/500, Iter: 12/119 -- train_loss: 1.1701 
2025-08-11 13:29:29,559 - INFO - Epoch: 260/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:33:06,698 - INFO - Epoch: 261/500, Iter: 1/119 -- train_loss: 1.1751 


[1/119]   1%|           [00:00<?]

2025-08-11 13:33:06,974 - INFO - Epoch: 261/500, Iter: 2/119 -- train_loss: 1.1673 
2025-08-11 13:33:08,565 - INFO - Epoch: 261/500, Iter: 3/119 -- train_loss: 1.1456 
2025-08-11 13:33:08,867 - INFO - Epoch: 261/500, Iter: 4/119 -- train_loss: 1.0855 
2025-08-11 13:33:10,520 - INFO - Epoch: 261/500, Iter: 5/119 -- train_loss: 1.1524 
2025-08-11 13:33:12,741 - INFO - Epoch: 261/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 13:33:13,058 - INFO - Epoch: 261/500, Iter: 7/119 -- train_loss: 1.0574 
2025-08-11 13:33:13,355 - INFO - Epoch: 261/500, Iter: 8/119 -- train_loss: 1.0941 
2025-08-11 13:33:13,685 - INFO - Epoch: 261/500, Iter: 9/119 -- train_loss: 1.1122 
2025-08-11 13:33:13,981 - INFO - Epoch: 261/500, Iter: 10/119 -- train_loss: 1.1540 
2025-08-11 13:33:14,304 - INFO - Epoch: 261/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 13:33:14,629 - INFO - Epoch: 261/500, Iter: 12/119 -- train_loss: 1.1654 
2025-08-11 13:33:16,440 - INFO - Epoch: 261/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:37:06,237 - INFO - Epoch: 262/500, Iter: 1/119 -- train_loss: 1.1719 


[1/119]   1%|           [00:00<?]

2025-08-11 13:37:06,538 - INFO - Epoch: 262/500, Iter: 2/119 -- train_loss: 1.1702 
2025-08-11 13:37:06,812 - INFO - Epoch: 262/500, Iter: 3/119 -- train_loss: 1.1646 
2025-08-11 13:37:07,096 - INFO - Epoch: 262/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 13:37:07,423 - INFO - Epoch: 262/500, Iter: 5/119 -- train_loss: 1.1663 
2025-08-11 13:37:07,834 - INFO - Epoch: 262/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 13:37:08,167 - INFO - Epoch: 262/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 13:37:08,450 - INFO - Epoch: 262/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 13:37:11,117 - INFO - Epoch: 262/500, Iter: 9/119 -- train_loss: 1.1673 
2025-08-11 13:37:11,396 - INFO - Epoch: 262/500, Iter: 10/119 -- train_loss: 1.1298 
2025-08-11 13:37:11,724 - INFO - Epoch: 262/500, Iter: 11/119 -- train_loss: 1.1754 
2025-08-11 13:37:12,052 - INFO - Epoch: 262/500, Iter: 12/119 -- train_loss: 1.1409 
2025-08-11 13:37:12,364 - INFO - Epoch: 262/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:40:44,805 - INFO - Epoch: 263/500, Iter: 1/119 -- train_loss: 1.1967 


[1/119]   1%|           [00:00<?]

2025-08-11 13:40:47,466 - INFO - Epoch: 263/500, Iter: 2/119 -- train_loss: 1.1625 
2025-08-11 13:40:47,793 - INFO - Epoch: 263/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 13:40:48,108 - INFO - Epoch: 263/500, Iter: 4/119 -- train_loss: 1.1645 
2025-08-11 13:40:48,389 - INFO - Epoch: 263/500, Iter: 5/119 -- train_loss: 1.1764 
2025-08-11 13:40:48,701 - INFO - Epoch: 263/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 13:40:49,016 - INFO - Epoch: 263/500, Iter: 7/119 -- train_loss: 1.1597 
2025-08-11 13:40:49,324 - INFO - Epoch: 263/500, Iter: 8/119 -- train_loss: 1.1605 
2025-08-11 13:40:54,133 - INFO - Epoch: 263/500, Iter: 9/119 -- train_loss: 1.1591 
2025-08-11 13:40:59,346 - INFO - Epoch: 263/500, Iter: 10/119 -- train_loss: 1.1392 
2025-08-11 13:40:59,610 - INFO - Epoch: 263/500, Iter: 11/119 -- train_loss: 1.1686 
2025-08-11 13:40:59,860 - INFO - Epoch: 263/500, Iter: 12/119 -- train_loss: 1.1305 
2025-08-11 13:41:00,168 - INFO - Epoch: 263/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:44:35,482 - INFO - Epoch: 264/500, Iter: 1/119 -- train_loss: 0.9536 


[1/119]   1%|           [00:00<?]

2025-08-11 13:44:38,332 - INFO - Epoch: 264/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 13:44:41,075 - INFO - Epoch: 264/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 13:44:41,348 - INFO - Epoch: 264/500, Iter: 4/119 -- train_loss: 1.1640 
2025-08-11 13:44:41,641 - INFO - Epoch: 264/500, Iter: 5/119 -- train_loss: 1.1569 
2025-08-11 13:44:41,932 - INFO - Epoch: 264/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 13:44:42,249 - INFO - Epoch: 264/500, Iter: 7/119 -- train_loss: 1.1693 
2025-08-11 13:44:42,538 - INFO - Epoch: 264/500, Iter: 8/119 -- train_loss: 1.1746 
2025-08-11 13:44:42,880 - INFO - Epoch: 264/500, Iter: 9/119 -- train_loss: 1.1602 
2025-08-11 13:44:43,766 - INFO - Epoch: 264/500, Iter: 10/119 -- train_loss: 1.1743 
2025-08-11 13:44:50,031 - INFO - Epoch: 264/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 13:44:50,335 - INFO - Epoch: 264/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 13:44:50,631 - INFO - Epoch: 264/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:48:24,415 - INFO - Epoch: 265/500, Iter: 1/119 -- train_loss: 1.1693 


[1/119]   1%|           [00:00<?]

2025-08-11 13:48:26,349 - INFO - Epoch: 265/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 13:48:26,632 - INFO - Epoch: 265/500, Iter: 3/119 -- train_loss: 1.0525 
2025-08-11 13:48:26,917 - INFO - Epoch: 265/500, Iter: 4/119 -- train_loss: 1.1470 
2025-08-11 13:48:27,238 - INFO - Epoch: 265/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 13:48:27,515 - INFO - Epoch: 265/500, Iter: 6/119 -- train_loss: 1.1535 
2025-08-11 13:48:28,942 - INFO - Epoch: 265/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 13:48:29,239 - INFO - Epoch: 265/500, Iter: 8/119 -- train_loss: 1.1409 
2025-08-11 13:48:30,342 - INFO - Epoch: 265/500, Iter: 9/119 -- train_loss: 1.1802 
2025-08-11 13:48:37,615 - INFO - Epoch: 265/500, Iter: 10/119 -- train_loss: 1.1874 
2025-08-11 13:48:37,908 - INFO - Epoch: 265/500, Iter: 11/119 -- train_loss: 1.1550 
2025-08-11 13:48:38,172 - INFO - Epoch: 265/500, Iter: 12/119 -- train_loss: 1.1745 
2025-08-11 13:48:38,478 - INFO - Epoch: 265/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:52:14,134 - INFO - Epoch: 266/500, Iter: 1/119 -- train_loss: 1.1705 


[1/119]   1%|           [00:00<?]

2025-08-11 13:52:15,134 - INFO - Epoch: 266/500, Iter: 2/119 -- train_loss: 1.1745 
2025-08-11 13:52:15,418 - INFO - Epoch: 266/500, Iter: 3/119 -- train_loss: 1.1691 
2025-08-11 13:52:15,733 - INFO - Epoch: 266/500, Iter: 4/119 -- train_loss: 1.1518 
2025-08-11 13:52:19,693 - INFO - Epoch: 266/500, Iter: 5/119 -- train_loss: 1.1741 
2025-08-11 13:52:20,017 - INFO - Epoch: 266/500, Iter: 6/119 -- train_loss: 1.1744 
2025-08-11 13:52:20,328 - INFO - Epoch: 266/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 13:52:20,657 - INFO - Epoch: 266/500, Iter: 8/119 -- train_loss: 1.1211 
2025-08-11 13:52:21,626 - INFO - Epoch: 266/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 13:52:21,950 - INFO - Epoch: 266/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 13:52:22,260 - INFO - Epoch: 266/500, Iter: 11/119 -- train_loss: 1.1538 
2025-08-11 13:52:27,300 - INFO - Epoch: 266/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 13:52:30,699 - INFO - Epoch: 266/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:56:01,660 - INFO - Epoch: 267/500, Iter: 1/119 -- train_loss: 1.1745 


[1/119]   1%|           [00:00<?]

2025-08-11 13:56:01,965 - INFO - Epoch: 267/500, Iter: 2/119 -- train_loss: 1.1570 
2025-08-11 13:56:02,522 - INFO - Epoch: 267/500, Iter: 3/119 -- train_loss: 1.1680 
2025-08-11 13:56:09,688 - INFO - Epoch: 267/500, Iter: 4/119 -- train_loss: 1.1753 
2025-08-11 13:56:09,990 - INFO - Epoch: 267/500, Iter: 5/119 -- train_loss: 1.1785 
2025-08-11 13:56:10,269 - INFO - Epoch: 267/500, Iter: 6/119 -- train_loss: 1.1500 
2025-08-11 13:56:10,578 - INFO - Epoch: 267/500, Iter: 7/119 -- train_loss: 1.1678 
2025-08-11 13:56:10,879 - INFO - Epoch: 267/500, Iter: 8/119 -- train_loss: 1.1744 
2025-08-11 13:56:11,156 - INFO - Epoch: 267/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 13:56:11,466 - INFO - Epoch: 267/500, Iter: 10/119 -- train_loss: 1.1323 
2025-08-11 13:56:15,824 - INFO - Epoch: 267/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 13:56:16,111 - INFO - Epoch: 267/500, Iter: 12/119 -- train_loss: 1.1786 
2025-08-11 13:56:16,468 - INFO - Epoch: 267/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 13:59:55,990 - INFO - Epoch: 268/500, Iter: 1/119 -- train_loss: 1.1437 


[1/119]   1%|           [00:00<?]

2025-08-11 14:00:03,775 - INFO - Epoch: 268/500, Iter: 2/119 -- train_loss: 1.1517 
2025-08-11 14:00:04,070 - INFO - Epoch: 268/500, Iter: 3/119 -- train_loss: 1.1718 
2025-08-11 14:00:04,344 - INFO - Epoch: 268/500, Iter: 4/119 -- train_loss: 1.1648 
2025-08-11 14:00:04,637 - INFO - Epoch: 268/500, Iter: 5/119 -- train_loss: 1.1129 
2025-08-11 14:00:04,930 - INFO - Epoch: 268/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 14:00:05,209 - INFO - Epoch: 268/500, Iter: 7/119 -- train_loss: 1.1745 
2025-08-11 14:00:05,527 - INFO - Epoch: 268/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 14:00:14,374 - INFO - Epoch: 268/500, Iter: 9/119 -- train_loss: 1.1706 
2025-08-11 14:00:14,603 - INFO - Epoch: 268/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 14:00:14,871 - INFO - Epoch: 268/500, Iter: 11/119 -- train_loss: 1.1264 
2025-08-11 14:00:15,137 - INFO - Epoch: 268/500, Iter: 12/119 -- train_loss: 1.1744 
2025-08-11 14:00:15,465 - INFO - Epoch: 268/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:04:00,556 - INFO - Epoch: 269/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 14:04:00,859 - INFO - Epoch: 269/500, Iter: 2/119 -- train_loss: 1.1756 
2025-08-11 14:04:01,155 - INFO - Epoch: 269/500, Iter: 3/119 -- train_loss: 1.1412 
2025-08-11 14:04:01,472 - INFO - Epoch: 269/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 14:04:01,739 - INFO - Epoch: 269/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 14:04:02,044 - INFO - Epoch: 269/500, Iter: 6/119 -- train_loss: 1.1696 
2025-08-11 14:04:02,328 - INFO - Epoch: 269/500, Iter: 7/119 -- train_loss: 1.1542 
2025-08-11 14:04:02,639 - INFO - Epoch: 269/500, Iter: 8/119 -- train_loss: 1.1303 
2025-08-11 14:04:10,705 - INFO - Epoch: 269/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 14:04:10,955 - INFO - Epoch: 269/500, Iter: 10/119 -- train_loss: 1.1372 
2025-08-11 14:04:11,205 - INFO - Epoch: 269/500, Iter: 11/119 -- train_loss: 1.1777 
2025-08-11 14:04:11,475 - INFO - Epoch: 269/500, Iter: 12/119 -- train_loss: 1.1675 
2025-08-11 14:04:11,765 - INFO - Epoch: 269/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:07:58,290 - INFO - Epoch: 270/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 14:07:58,608 - INFO - Epoch: 270/500, Iter: 2/119 -- train_loss: 1.1689 
2025-08-11 14:08:03,034 - INFO - Epoch: 270/500, Iter: 3/119 -- train_loss: 1.1744 
2025-08-11 14:08:03,334 - INFO - Epoch: 270/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 14:08:03,628 - INFO - Epoch: 270/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 14:08:03,907 - INFO - Epoch: 270/500, Iter: 6/119 -- train_loss: 1.1418 
2025-08-11 14:08:04,212 - INFO - Epoch: 270/500, Iter: 7/119 -- train_loss: 1.1746 
2025-08-11 14:08:04,500 - INFO - Epoch: 270/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 14:08:05,900 - INFO - Epoch: 270/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 14:08:06,200 - INFO - Epoch: 270/500, Iter: 10/119 -- train_loss: 1.1568 
2025-08-11 14:08:07,509 - INFO - Epoch: 270/500, Iter: 11/119 -- train_loss: 1.1628 
2025-08-11 14:08:09,080 - INFO - Epoch: 270/500, Iter: 12/119 -- train_loss: 1.1525 
2025-08-11 14:08:09,387 - INFO - Epoch: 270/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:11:51,177 - INFO - Epoch: 271/500, Iter: 1/119 -- train_loss: 1.1653 


[1/119]   1%|           [00:00<?]

2025-08-11 14:11:51,527 - INFO - Epoch: 271/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 14:11:51,828 - INFO - Epoch: 271/500, Iter: 3/119 -- train_loss: 0.9522 
2025-08-11 14:11:53,954 - INFO - Epoch: 271/500, Iter: 4/119 -- train_loss: 1.0946 
2025-08-11 14:11:54,249 - INFO - Epoch: 271/500, Iter: 5/119 -- train_loss: 1.1741 
2025-08-11 14:11:54,577 - INFO - Epoch: 271/500, Iter: 6/119 -- train_loss: 1.1744 
2025-08-11 14:11:54,877 - INFO - Epoch: 271/500, Iter: 7/119 -- train_loss: 1.1658 
2025-08-11 14:11:55,193 - INFO - Epoch: 271/500, Iter: 8/119 -- train_loss: 1.1736 
2025-08-11 14:11:57,470 - INFO - Epoch: 271/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 14:11:57,944 - INFO - Epoch: 271/500, Iter: 10/119 -- train_loss: 1.1745 
2025-08-11 14:11:58,251 - INFO - Epoch: 271/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 14:11:58,594 - INFO - Epoch: 271/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 14:11:58,870 - INFO - Epoch: 271/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:15:40,313 - INFO - Epoch: 272/500, Iter: 1/119 -- train_loss: 1.1579 


[1/119]   1%|           [00:00<?]

2025-08-11 14:15:42,991 - INFO - Epoch: 272/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 14:15:43,290 - INFO - Epoch: 272/500, Iter: 3/119 -- train_loss: 1.1744 
2025-08-11 14:15:44,581 - INFO - Epoch: 272/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 14:15:44,879 - INFO - Epoch: 272/500, Iter: 5/119 -- train_loss: 1.1741 
2025-08-11 14:15:45,194 - INFO - Epoch: 272/500, Iter: 6/119 -- train_loss: 1.1739 
2025-08-11 14:15:45,531 - INFO - Epoch: 272/500, Iter: 7/119 -- train_loss: 1.1680 
2025-08-11 14:15:45,847 - INFO - Epoch: 272/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 14:15:51,057 - INFO - Epoch: 272/500, Iter: 9/119 -- train_loss: 1.1791 
2025-08-11 14:15:54,073 - INFO - Epoch: 272/500, Iter: 10/119 -- train_loss: 1.1590 
2025-08-11 14:15:54,368 - INFO - Epoch: 272/500, Iter: 11/119 -- train_loss: 1.1647 
2025-08-11 14:15:54,656 - INFO - Epoch: 272/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 14:15:54,930 - INFO - Epoch: 272/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:19:21,846 - INFO - Epoch: 273/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 14:19:25,154 - INFO - Epoch: 273/500, Iter: 2/119 -- train_loss: 1.1707 
2025-08-11 14:19:25,464 - INFO - Epoch: 273/500, Iter: 3/119 -- train_loss: 1.1716 
2025-08-11 14:19:25,832 - INFO - Epoch: 273/500, Iter: 4/119 -- train_loss: 1.0888 
2025-08-11 14:19:26,152 - INFO - Epoch: 273/500, Iter: 5/119 -- train_loss: 1.1712 
2025-08-11 14:19:26,428 - INFO - Epoch: 273/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 14:19:26,747 - INFO - Epoch: 273/500, Iter: 7/119 -- train_loss: 1.1747 
2025-08-11 14:19:27,057 - INFO - Epoch: 273/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 14:19:27,373 - INFO - Epoch: 273/500, Iter: 9/119 -- train_loss: 1.1754 
2025-08-11 14:19:31,977 - INFO - Epoch: 273/500, Iter: 10/119 -- train_loss: 1.1407 
2025-08-11 14:19:32,274 - INFO - Epoch: 273/500, Iter: 11/119 -- train_loss: 1.1383 
2025-08-11 14:19:39,204 - INFO - Epoch: 273/500, Iter: 12/119 -- train_loss: 1.1717 
2025-08-11 14:19:39,467 - INFO - Epoch: 273/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:23:20,521 - INFO - Epoch: 274/500, Iter: 1/119 -- train_loss: 1.1727 


[1/119]   1%|           [00:00<?]

2025-08-11 14:23:22,709 - INFO - Epoch: 274/500, Iter: 2/119 -- train_loss: 1.1763 
2025-08-11 14:23:24,654 - INFO - Epoch: 274/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 14:23:24,971 - INFO - Epoch: 274/500, Iter: 4/119 -- train_loss: 1.0861 
2025-08-11 14:23:25,296 - INFO - Epoch: 274/500, Iter: 5/119 -- train_loss: 0.9325 
2025-08-11 14:23:25,581 - INFO - Epoch: 274/500, Iter: 6/119 -- train_loss: 1.1444 
2025-08-11 14:23:25,850 - INFO - Epoch: 274/500, Iter: 7/119 -- train_loss: 1.1691 
2025-08-11 14:23:29,447 - INFO - Epoch: 274/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 14:23:29,748 - INFO - Epoch: 274/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 14:23:32,451 - INFO - Epoch: 274/500, Iter: 10/119 -- train_loss: 1.1454 
2025-08-11 14:23:32,737 - INFO - Epoch: 274/500, Iter: 11/119 -- train_loss: 1.1545 
2025-08-11 14:23:35,671 - INFO - Epoch: 274/500, Iter: 12/119 -- train_loss: 1.0557 
2025-08-11 14:23:35,975 - INFO - Epoch: 274/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:27:12,279 - INFO - Epoch: 275/500, Iter: 1/119 -- train_loss: 1.1691 


[1/119]   1%|           [00:00<?]

2025-08-11 14:27:13,363 - INFO - Epoch: 275/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 14:27:16,678 - INFO - Epoch: 275/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 14:27:18,869 - INFO - Epoch: 275/500, Iter: 4/119 -- train_loss: 1.1668 
2025-08-11 14:27:19,200 - INFO - Epoch: 275/500, Iter: 5/119 -- train_loss: 1.1741 
2025-08-11 14:27:19,523 - INFO - Epoch: 275/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 14:27:19,812 - INFO - Epoch: 275/500, Iter: 7/119 -- train_loss: 1.0681 
2025-08-11 14:27:20,106 - INFO - Epoch: 275/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 14:27:20,414 - INFO - Epoch: 275/500, Iter: 9/119 -- train_loss: 1.1542 
2025-08-11 14:27:26,661 - INFO - Epoch: 275/500, Iter: 10/119 -- train_loss: 1.1731 
2025-08-11 14:27:26,962 - INFO - Epoch: 275/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 14:27:28,568 - INFO - Epoch: 275/500, Iter: 12/119 -- train_loss: 1.1703 
2025-08-11 14:27:28,851 - INFO - Epoch: 275/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:30:58,028 - INFO - Epoch: 276/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 14:30:59,862 - INFO - Epoch: 276/500, Iter: 2/119 -- train_loss: 1.1743 
2025-08-11 14:31:00,786 - INFO - Epoch: 276/500, Iter: 3/119 -- train_loss: 1.1485 
2025-08-11 14:31:01,063 - INFO - Epoch: 276/500, Iter: 4/119 -- train_loss: 1.1706 
2025-08-11 14:31:01,394 - INFO - Epoch: 276/500, Iter: 5/119 -- train_loss: 1.1400 
2025-08-11 14:31:01,707 - INFO - Epoch: 276/500, Iter: 6/119 -- train_loss: 1.0906 
2025-08-11 14:31:01,992 - INFO - Epoch: 276/500, Iter: 7/119 -- train_loss: 1.1761 
2025-08-11 14:31:02,303 - INFO - Epoch: 276/500, Iter: 8/119 -- train_loss: 1.1585 
2025-08-11 14:31:02,688 - INFO - Epoch: 276/500, Iter: 9/119 -- train_loss: 1.1750 
2025-08-11 14:31:04,241 - INFO - Epoch: 276/500, Iter: 10/119 -- train_loss: 1.1624 
2025-08-11 14:31:07,220 - INFO - Epoch: 276/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 14:31:07,513 - INFO - Epoch: 276/500, Iter: 12/119 -- train_loss: 1.1748 
2025-08-11 14:31:07,805 - INFO - Epoch: 276/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:34:46,131 - INFO - Epoch: 277/500, Iter: 1/119 -- train_loss: 1.1411 


[1/119]   1%|           [00:00<?]

2025-08-11 14:34:46,439 - INFO - Epoch: 277/500, Iter: 2/119 -- train_loss: 1.1748 
2025-08-11 14:34:46,718 - INFO - Epoch: 277/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 14:34:47,028 - INFO - Epoch: 277/500, Iter: 4/119 -- train_loss: 0.9828 
2025-08-11 14:34:47,315 - INFO - Epoch: 277/500, Iter: 5/119 -- train_loss: 1.1619 
2025-08-11 14:34:47,654 - INFO - Epoch: 277/500, Iter: 6/119 -- train_loss: 1.1717 
2025-08-11 14:34:51,651 - INFO - Epoch: 277/500, Iter: 7/119 -- train_loss: 1.1627 
2025-08-11 14:34:51,955 - INFO - Epoch: 277/500, Iter: 8/119 -- train_loss: 1.1732 
2025-08-11 14:34:55,051 - INFO - Epoch: 277/500, Iter: 9/119 -- train_loss: 1.1303 
2025-08-11 14:34:55,351 - INFO - Epoch: 277/500, Iter: 10/119 -- train_loss: 1.1634 
2025-08-11 14:34:55,640 - INFO - Epoch: 277/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 14:34:55,960 - INFO - Epoch: 277/500, Iter: 12/119 -- train_loss: 1.1665 
2025-08-11 14:34:56,254 - INFO - Epoch: 277/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:38:36,125 - INFO - Epoch: 278/500, Iter: 1/119 -- train_loss: 1.1384 


[1/119]   1%|           [00:00<?]

2025-08-11 14:38:38,826 - INFO - Epoch: 278/500, Iter: 2/119 -- train_loss: 1.1550 
2025-08-11 14:38:39,159 - INFO - Epoch: 278/500, Iter: 3/119 -- train_loss: 1.1768 
2025-08-11 14:38:39,462 - INFO - Epoch: 278/500, Iter: 4/119 -- train_loss: 1.1649 
2025-08-11 14:38:39,776 - INFO - Epoch: 278/500, Iter: 5/119 -- train_loss: 1.1629 
2025-08-11 14:38:40,072 - INFO - Epoch: 278/500, Iter: 6/119 -- train_loss: 1.1072 
2025-08-11 14:38:40,405 - INFO - Epoch: 278/500, Iter: 7/119 -- train_loss: 1.1600 
2025-08-11 14:38:40,693 - INFO - Epoch: 278/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 14:38:41,005 - INFO - Epoch: 278/500, Iter: 9/119 -- train_loss: 1.1635 
2025-08-11 14:38:50,622 - INFO - Epoch: 278/500, Iter: 10/119 -- train_loss: 1.1654 
2025-08-11 14:38:50,883 - INFO - Epoch: 278/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 14:38:51,148 - INFO - Epoch: 278/500, Iter: 12/119 -- train_loss: 1.1343 
2025-08-11 14:38:51,394 - INFO - Epoch: 278/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:42:18,044 - INFO - Epoch: 279/500, Iter: 1/119 -- train_loss: 1.1771 


[1/119]   1%|           [00:00<?]

2025-08-11 14:42:24,994 - INFO - Epoch: 279/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 14:42:25,300 - INFO - Epoch: 279/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 14:42:25,599 - INFO - Epoch: 279/500, Iter: 4/119 -- train_loss: 1.1476 
2025-08-11 14:42:27,394 - INFO - Epoch: 279/500, Iter: 5/119 -- train_loss: 1.1136 
2025-08-11 14:42:27,688 - INFO - Epoch: 279/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 14:42:27,995 - INFO - Epoch: 279/500, Iter: 7/119 -- train_loss: 1.1633 
2025-08-11 14:42:28,296 - INFO - Epoch: 279/500, Iter: 8/119 -- train_loss: 1.0752 
2025-08-11 14:42:28,610 - INFO - Epoch: 279/500, Iter: 9/119 -- train_loss: 1.1612 
2025-08-11 14:42:33,341 - INFO - Epoch: 279/500, Iter: 10/119 -- train_loss: 1.1715 
2025-08-11 14:42:33,580 - INFO - Epoch: 279/500, Iter: 11/119 -- train_loss: 1.1386 
2025-08-11 14:42:33,860 - INFO - Epoch: 279/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 14:42:34,172 - INFO - Epoch: 279/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:46:12,162 - INFO - Epoch: 280/500, Iter: 1/119 -- train_loss: 1.1535 


[1/119]   1%|           [00:00<?]

2025-08-11 14:46:13,338 - INFO - Epoch: 280/500, Iter: 2/119 -- train_loss: 1.1747 
2025-08-11 14:46:14,868 - INFO - Epoch: 280/500, Iter: 3/119 -- train_loss: 1.1743 
2025-08-11 14:46:15,167 - INFO - Epoch: 280/500, Iter: 4/119 -- train_loss: 1.1385 
2025-08-11 14:46:15,603 - INFO - Epoch: 280/500, Iter: 5/119 -- train_loss: 1.1733 
2025-08-11 14:46:15,921 - INFO - Epoch: 280/500, Iter: 6/119 -- train_loss: 1.1499 
2025-08-11 14:46:16,208 - INFO - Epoch: 280/500, Iter: 7/119 -- train_loss: 1.1669 
2025-08-11 14:46:16,539 - INFO - Epoch: 280/500, Iter: 8/119 -- train_loss: 1.1694 
2025-08-11 14:46:17,099 - INFO - Epoch: 280/500, Iter: 9/119 -- train_loss: 1.1591 
2025-08-11 14:46:19,591 - INFO - Epoch: 280/500, Iter: 10/119 -- train_loss: 1.1236 
2025-08-11 14:46:19,946 - INFO - Epoch: 280/500, Iter: 11/119 -- train_loss: 1.1681 
2025-08-11 14:46:20,234 - INFO - Epoch: 280/500, Iter: 12/119 -- train_loss: 1.1707 
2025-08-11 14:46:21,585 - INFO - Epoch: 280/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:49:59,270 - INFO - Epoch: 281/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 14:49:59,548 - INFO - Epoch: 281/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 14:49:59,868 - INFO - Epoch: 281/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 14:50:00,154 - INFO - Epoch: 281/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 14:50:00,480 - INFO - Epoch: 281/500, Iter: 5/119 -- train_loss: 1.1741 
2025-08-11 14:50:00,810 - INFO - Epoch: 281/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 14:50:01,104 - INFO - Epoch: 281/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 14:50:01,416 - INFO - Epoch: 281/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 14:50:04,152 - INFO - Epoch: 281/500, Iter: 9/119 -- train_loss: 1.1523 
2025-08-11 14:50:04,730 - INFO - Epoch: 281/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 14:50:05,030 - INFO - Epoch: 281/500, Iter: 11/119 -- train_loss: 1.1733 
2025-08-11 14:50:19,057 - INFO - Epoch: 281/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 14:50:19,303 - INFO - Epoch: 281/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:53:51,393 - INFO - Epoch: 282/500, Iter: 1/119 -- train_loss: 1.1736 


[1/119]   1%|           [00:00<?]

2025-08-11 14:53:51,687 - INFO - Epoch: 282/500, Iter: 2/119 -- train_loss: 1.1573 
2025-08-11 14:53:55,967 - INFO - Epoch: 282/500, Iter: 3/119 -- train_loss: 0.9137 
2025-08-11 14:53:56,293 - INFO - Epoch: 282/500, Iter: 4/119 -- train_loss: 1.1527 
2025-08-11 14:53:56,594 - INFO - Epoch: 282/500, Iter: 5/119 -- train_loss: 1.1303 
2025-08-11 14:53:56,905 - INFO - Epoch: 282/500, Iter: 6/119 -- train_loss: 1.1753 
2025-08-11 14:53:57,219 - INFO - Epoch: 282/500, Iter: 7/119 -- train_loss: 1.1755 
2025-08-11 14:53:57,493 - INFO - Epoch: 282/500, Iter: 8/119 -- train_loss: 1.1744 
2025-08-11 14:54:00,666 - INFO - Epoch: 282/500, Iter: 9/119 -- train_loss: 1.1521 
2025-08-11 14:54:00,961 - INFO - Epoch: 282/500, Iter: 10/119 -- train_loss: 1.1372 
2025-08-11 14:54:06,992 - INFO - Epoch: 282/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 14:54:07,325 - INFO - Epoch: 282/500, Iter: 12/119 -- train_loss: 1.1632 
2025-08-11 14:54:07,775 - INFO - Epoch: 282/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 14:57:35,569 - INFO - Epoch: 283/500, Iter: 1/119 -- train_loss: 1.1700 


[1/119]   1%|           [00:00<?]

2025-08-11 14:57:35,882 - INFO - Epoch: 283/500, Iter: 2/119 -- train_loss: 1.1655 
2025-08-11 14:57:36,668 - INFO - Epoch: 283/500, Iter: 3/119 -- train_loss: 1.1728 
2025-08-11 14:57:36,984 - INFO - Epoch: 283/500, Iter: 4/119 -- train_loss: 1.1751 
2025-08-11 14:57:37,925 - INFO - Epoch: 283/500, Iter: 5/119 -- train_loss: 1.1177 
2025-08-11 14:57:38,248 - INFO - Epoch: 283/500, Iter: 6/119 -- train_loss: 1.1723 
2025-08-11 14:57:38,542 - INFO - Epoch: 283/500, Iter: 7/119 -- train_loss: 1.1644 
2025-08-11 14:57:38,847 - INFO - Epoch: 283/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 14:57:41,011 - INFO - Epoch: 283/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 14:57:44,091 - INFO - Epoch: 283/500, Iter: 10/119 -- train_loss: 1.1414 
2025-08-11 14:57:47,307 - INFO - Epoch: 283/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 14:57:47,615 - INFO - Epoch: 283/500, Iter: 12/119 -- train_loss: 1.1313 
2025-08-11 14:57:47,915 - INFO - Epoch: 283/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:01:30,244 - INFO - Epoch: 284/500, Iter: 1/119 -- train_loss: 1.1741 


[1/119]   1%|           [00:00<?]

2025-08-11 15:01:30,552 - INFO - Epoch: 284/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 15:01:34,985 - INFO - Epoch: 284/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 15:01:35,271 - INFO - Epoch: 284/500, Iter: 4/119 -- train_loss: 1.1559 
2025-08-11 15:01:35,582 - INFO - Epoch: 284/500, Iter: 5/119 -- train_loss: 1.1741 
2025-08-11 15:01:35,888 - INFO - Epoch: 284/500, Iter: 6/119 -- train_loss: 1.1677 
2025-08-11 15:01:36,197 - INFO - Epoch: 284/500, Iter: 7/119 -- train_loss: 1.1384 
2025-08-11 15:01:36,496 - INFO - Epoch: 284/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 15:01:37,495 - INFO - Epoch: 284/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 15:01:37,773 - INFO - Epoch: 284/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 15:01:43,474 - INFO - Epoch: 284/500, Iter: 11/119 -- train_loss: 1.1311 
2025-08-11 15:01:47,214 - INFO - Epoch: 284/500, Iter: 12/119 -- train_loss: 1.1411 
2025-08-11 15:01:47,535 - INFO - Epoch: 284/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:05:15,010 - INFO - Epoch: 285/500, Iter: 1/119 -- train_loss: 1.1545 


[1/119]   1%|           [00:00<?]

2025-08-11 15:05:22,743 - INFO - Epoch: 285/500, Iter: 2/119 -- train_loss: 1.0145 
2025-08-11 15:05:23,038 - INFO - Epoch: 285/500, Iter: 3/119 -- train_loss: 1.0760 
2025-08-11 15:05:23,291 - INFO - Epoch: 285/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 15:05:23,551 - INFO - Epoch: 285/500, Iter: 5/119 -- train_loss: 1.1741 
2025-08-11 15:05:23,859 - INFO - Epoch: 285/500, Iter: 6/119 -- train_loss: 1.1562 
2025-08-11 15:05:24,159 - INFO - Epoch: 285/500, Iter: 7/119 -- train_loss: 1.1007 
2025-08-11 15:05:24,473 - INFO - Epoch: 285/500, Iter: 8/119 -- train_loss: 1.1700 
2025-08-11 15:05:24,742 - INFO - Epoch: 285/500, Iter: 9/119 -- train_loss: 1.1491 
2025-08-11 15:05:28,509 - INFO - Epoch: 285/500, Iter: 10/119 -- train_loss: 0.9522 
2025-08-11 15:05:28,790 - INFO - Epoch: 285/500, Iter: 11/119 -- train_loss: 1.1892 
2025-08-11 15:05:31,179 - INFO - Epoch: 285/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 15:05:31,459 - INFO - Epoch: 285/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:10:05,499 - INFO - Epoch: 286/500, Iter: 1/119 -- train_loss: 1.1741 


[1/119]   1%|           [00:00<?]

2025-08-11 15:10:13,526 - INFO - Epoch: 286/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 15:10:13,863 - INFO - Epoch: 286/500, Iter: 3/119 -- train_loss: 1.1794 
2025-08-11 15:10:16,399 - INFO - Epoch: 286/500, Iter: 4/119 -- train_loss: 1.0412 
2025-08-11 15:10:16,738 - INFO - Epoch: 286/500, Iter: 5/119 -- train_loss: 1.1741 
2025-08-11 15:10:17,097 - INFO - Epoch: 286/500, Iter: 6/119 -- train_loss: 1.1767 
2025-08-11 15:10:17,451 - INFO - Epoch: 286/500, Iter: 7/119 -- train_loss: 1.1749 
2025-08-11 15:10:17,793 - INFO - Epoch: 286/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 15:10:18,152 - INFO - Epoch: 286/500, Iter: 9/119 -- train_loss: 1.1593 
2025-08-11 15:10:21,222 - INFO - Epoch: 286/500, Iter: 10/119 -- train_loss: 1.1105 
2025-08-11 15:10:21,565 - INFO - Epoch: 286/500, Iter: 11/119 -- train_loss: 1.1740 
2025-08-11 15:10:25,668 - INFO - Epoch: 286/500, Iter: 12/119 -- train_loss: 1.1401 
2025-08-11 15:10:26,019 - INFO - Epoch: 286/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:14:41,154 - INFO - Epoch: 287/500, Iter: 1/119 -- train_loss: 1.1772 


[1/119]   1%|           [00:00<?]

2025-08-11 15:14:41,764 - INFO - Epoch: 287/500, Iter: 2/119 -- train_loss: 1.1706 
2025-08-11 15:14:44,455 - INFO - Epoch: 287/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 15:14:45,067 - INFO - Epoch: 287/500, Iter: 4/119 -- train_loss: 1.1638 
2025-08-11 15:14:47,324 - INFO - Epoch: 287/500, Iter: 5/119 -- train_loss: 1.1436 
2025-08-11 15:14:47,986 - INFO - Epoch: 287/500, Iter: 6/119 -- train_loss: 1.1739 
2025-08-11 15:14:48,843 - INFO - Epoch: 287/500, Iter: 7/119 -- train_loss: 1.0362 
2025-08-11 15:14:49,414 - INFO - Epoch: 287/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 15:14:54,897 - INFO - Epoch: 287/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 15:14:55,562 - INFO - Epoch: 287/500, Iter: 10/119 -- train_loss: 1.1661 
2025-08-11 15:14:57,782 - INFO - Epoch: 287/500, Iter: 11/119 -- train_loss: 1.0397 
2025-08-11 15:15:01,894 - INFO - Epoch: 287/500, Iter: 12/119 -- train_loss: 1.0671 
2025-08-11 15:15:02,464 - INFO - Epoch: 287/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:20:07,702 - INFO - Epoch: 288/500, Iter: 1/119 -- train_loss: 1.0749 


[1/119]   1%|           [00:00<?]

2025-08-11 15:20:10,489 - INFO - Epoch: 288/500, Iter: 2/119 -- train_loss: 1.0572 
2025-08-11 15:20:13,346 - INFO - Epoch: 288/500, Iter: 3/119 -- train_loss: 1.1538 
2025-08-11 15:20:13,712 - INFO - Epoch: 288/500, Iter: 4/119 -- train_loss: 1.1673 
2025-08-11 15:20:14,059 - INFO - Epoch: 288/500, Iter: 5/119 -- train_loss: 1.1380 
2025-08-11 15:20:14,406 - INFO - Epoch: 288/500, Iter: 6/119 -- train_loss: 1.1748 
2025-08-11 15:20:14,741 - INFO - Epoch: 288/500, Iter: 7/119 -- train_loss: 1.1606 
2025-08-11 15:20:15,098 - INFO - Epoch: 288/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 15:20:21,130 - INFO - Epoch: 288/500, Iter: 9/119 -- train_loss: 1.1660 
2025-08-11 15:20:21,468 - INFO - Epoch: 288/500, Iter: 10/119 -- train_loss: 0.9007 
2025-08-11 15:20:21,811 - INFO - Epoch: 288/500, Iter: 11/119 -- train_loss: 1.1803 
2025-08-11 15:20:22,158 - INFO - Epoch: 288/500, Iter: 12/119 -- train_loss: 1.1708 
2025-08-11 15:20:22,869 - INFO - Epoch: 288/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:24:35,472 - INFO - Epoch: 289/500, Iter: 1/119 -- train_loss: 1.1454 


[1/119]   1%|           [00:00<?]

2025-08-11 15:24:35,811 - INFO - Epoch: 289/500, Iter: 2/119 -- train_loss: 1.1748 
2025-08-11 15:24:36,106 - INFO - Epoch: 289/500, Iter: 3/119 -- train_loss: 1.1424 
2025-08-11 15:24:36,404 - INFO - Epoch: 289/500, Iter: 4/119 -- train_loss: 1.0717 
2025-08-11 15:24:36,751 - INFO - Epoch: 289/500, Iter: 5/119 -- train_loss: 1.0007 
2025-08-11 15:24:37,058 - INFO - Epoch: 289/500, Iter: 6/119 -- train_loss: 1.1744 
2025-08-11 15:24:37,386 - INFO - Epoch: 289/500, Iter: 7/119 -- train_loss: 1.1763 
2025-08-11 15:24:37,725 - INFO - Epoch: 289/500, Iter: 8/119 -- train_loss: 1.1709 
2025-08-11 15:24:41,986 - INFO - Epoch: 289/500, Iter: 9/119 -- train_loss: 1.1812 
2025-08-11 15:24:42,283 - INFO - Epoch: 289/500, Iter: 10/119 -- train_loss: 1.1810 
2025-08-11 15:24:42,599 - INFO - Epoch: 289/500, Iter: 11/119 -- train_loss: 1.1641 
2025-08-11 15:24:42,918 - INFO - Epoch: 289/500, Iter: 12/119 -- train_loss: 1.1785 
2025-08-11 15:24:43,883 - INFO - Epoch: 289/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:28:09,324 - INFO - Epoch: 290/500, Iter: 1/119 -- train_loss: 1.1534 


[1/119]   1%|           [00:00<?]

2025-08-11 15:28:09,657 - INFO - Epoch: 290/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 15:28:15,741 - INFO - Epoch: 290/500, Iter: 3/119 -- train_loss: 1.1704 
2025-08-11 15:28:16,067 - INFO - Epoch: 290/500, Iter: 4/119 -- train_loss: 1.1563 
2025-08-11 15:28:17,650 - INFO - Epoch: 290/500, Iter: 5/119 -- train_loss: 1.1670 
2025-08-11 15:28:17,941 - INFO - Epoch: 290/500, Iter: 6/119 -- train_loss: 1.1726 
2025-08-11 15:28:18,257 - INFO - Epoch: 290/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 15:28:18,567 - INFO - Epoch: 290/500, Iter: 8/119 -- train_loss: 1.1524 
2025-08-11 15:28:18,912 - INFO - Epoch: 290/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 15:28:19,186 - INFO - Epoch: 290/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 15:28:21,874 - INFO - Epoch: 290/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 15:28:26,540 - INFO - Epoch: 290/500, Iter: 12/119 -- train_loss: 1.1744 
2025-08-11 15:28:26,849 - INFO - Epoch: 290/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:32:08,932 - INFO - Epoch: 291/500, Iter: 1/119 -- train_loss: 1.0228 


[1/119]   1%|           [00:00<?]

2025-08-11 15:32:09,313 - INFO - Epoch: 291/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 15:32:13,841 - INFO - Epoch: 291/500, Iter: 3/119 -- train_loss: 1.1372 
2025-08-11 15:32:14,152 - INFO - Epoch: 291/500, Iter: 4/119 -- train_loss: 1.1175 
2025-08-11 15:32:14,456 - INFO - Epoch: 291/500, Iter: 5/119 -- train_loss: 1.1744 
2025-08-11 15:32:14,779 - INFO - Epoch: 291/500, Iter: 6/119 -- train_loss: 1.1495 
2025-08-11 15:32:15,077 - INFO - Epoch: 291/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 15:32:15,356 - INFO - Epoch: 291/500, Iter: 8/119 -- train_loss: 1.1670 
2025-08-11 15:32:17,767 - INFO - Epoch: 291/500, Iter: 9/119 -- train_loss: 1.1532 
2025-08-11 15:32:18,076 - INFO - Epoch: 291/500, Iter: 10/119 -- train_loss: 1.1719 
2025-08-11 15:32:25,063 - INFO - Epoch: 291/500, Iter: 11/119 -- train_loss: 1.1665 
2025-08-11 15:32:25,345 - INFO - Epoch: 291/500, Iter: 12/119 -- train_loss: 1.1755 
2025-08-11 15:32:25,622 - INFO - Epoch: 291/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:36:06,293 - INFO - Epoch: 292/500, Iter: 1/119 -- train_loss: 1.1530 


[1/119]   1%|           [00:00<?]

2025-08-11 15:36:06,603 - INFO - Epoch: 292/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 15:36:06,919 - INFO - Epoch: 292/500, Iter: 3/119 -- train_loss: 1.1336 
2025-08-11 15:36:07,228 - INFO - Epoch: 292/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 15:36:07,569 - INFO - Epoch: 292/500, Iter: 5/119 -- train_loss: 1.1741 
2025-08-11 15:36:07,856 - INFO - Epoch: 292/500, Iter: 6/119 -- train_loss: 1.1703 
2025-08-11 15:36:08,153 - INFO - Epoch: 292/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 15:36:08,436 - INFO - Epoch: 292/500, Iter: 8/119 -- train_loss: 1.1599 
2025-08-11 15:36:11,733 - INFO - Epoch: 292/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 15:36:12,052 - INFO - Epoch: 292/500, Iter: 10/119 -- train_loss: 1.1214 
2025-08-11 15:36:12,728 - INFO - Epoch: 292/500, Iter: 11/119 -- train_loss: 1.1750 
2025-08-11 15:36:13,045 - INFO - Epoch: 292/500, Iter: 12/119 -- train_loss: 1.1162 
2025-08-11 15:36:13,351 - INFO - Epoch: 292/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:39:56,158 - INFO - Epoch: 293/500, Iter: 1/119 -- train_loss: 1.1592 


[1/119]   1%|           [00:00<?]

2025-08-11 15:39:58,445 - INFO - Epoch: 293/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 15:39:58,758 - INFO - Epoch: 293/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 15:39:59,045 - INFO - Epoch: 293/500, Iter: 4/119 -- train_loss: 0.9287 
2025-08-11 15:39:59,318 - INFO - Epoch: 293/500, Iter: 5/119 -- train_loss: 1.1435 
2025-08-11 15:39:59,634 - INFO - Epoch: 293/500, Iter: 6/119 -- train_loss: 1.1745 
2025-08-11 15:39:59,925 - INFO - Epoch: 293/500, Iter: 7/119 -- train_loss: 1.1770 
2025-08-11 15:40:00,252 - INFO - Epoch: 293/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 15:40:02,138 - INFO - Epoch: 293/500, Iter: 9/119 -- train_loss: 1.1557 
2025-08-11 15:40:02,462 - INFO - Epoch: 293/500, Iter: 10/119 -- train_loss: 1.1681 
2025-08-11 15:40:03,146 - INFO - Epoch: 293/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-11 15:40:03,425 - INFO - Epoch: 293/500, Iter: 12/119 -- train_loss: 1.1152 
2025-08-11 15:40:03,725 - INFO - Epoch: 293/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:43:50,312 - INFO - Epoch: 294/500, Iter: 1/119 -- train_loss: 1.1627 


[1/119]   1%|           [00:00<?]

2025-08-11 15:43:51,274 - INFO - Epoch: 294/500, Iter: 2/119 -- train_loss: 1.1757 
2025-08-11 15:43:51,584 - INFO - Epoch: 294/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 15:43:51,858 - INFO - Epoch: 294/500, Iter: 4/119 -- train_loss: 1.1774 
2025-08-11 15:43:52,164 - INFO - Epoch: 294/500, Iter: 5/119 -- train_loss: 1.1492 
2025-08-11 15:43:52,427 - INFO - Epoch: 294/500, Iter: 6/119 -- train_loss: 1.0470 
2025-08-11 15:43:55,214 - INFO - Epoch: 294/500, Iter: 7/119 -- train_loss: 1.1767 
2025-08-11 15:43:59,263 - INFO - Epoch: 294/500, Iter: 8/119 -- train_loss: 1.1694 
2025-08-11 15:43:59,551 - INFO - Epoch: 294/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 15:43:59,841 - INFO - Epoch: 294/500, Iter: 10/119 -- train_loss: 1.1742 
2025-08-11 15:44:00,122 - INFO - Epoch: 294/500, Iter: 11/119 -- train_loss: 1.1730 
2025-08-11 15:44:00,393 - INFO - Epoch: 294/500, Iter: 12/119 -- train_loss: 1.1308 
2025-08-11 15:44:01,296 - INFO - Epoch: 294/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:47:35,595 - INFO - Epoch: 295/500, Iter: 1/119 -- train_loss: 1.1639 


[1/119]   1%|           [00:00<?]

2025-08-11 15:47:41,564 - INFO - Epoch: 295/500, Iter: 2/119 -- train_loss: 1.1750 
2025-08-11 15:47:41,814 - INFO - Epoch: 295/500, Iter: 3/119 -- train_loss: 1.1170 
2025-08-11 15:47:42,108 - INFO - Epoch: 295/500, Iter: 4/119 -- train_loss: 1.1702 
2025-08-11 15:47:42,414 - INFO - Epoch: 295/500, Iter: 5/119 -- train_loss: 1.1142 
2025-08-11 15:47:42,737 - INFO - Epoch: 295/500, Iter: 6/119 -- train_loss: 1.0581 
2025-08-11 15:47:43,014 - INFO - Epoch: 295/500, Iter: 7/119 -- train_loss: 1.1128 
2025-08-11 15:47:43,359 - INFO - Epoch: 295/500, Iter: 8/119 -- train_loss: 1.1721 
2025-08-11 15:47:47,587 - INFO - Epoch: 295/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 15:47:47,888 - INFO - Epoch: 295/500, Iter: 10/119 -- train_loss: 1.1699 
2025-08-11 15:47:48,191 - INFO - Epoch: 295/500, Iter: 11/119 -- train_loss: 1.1754 
2025-08-11 15:47:48,487 - INFO - Epoch: 295/500, Iter: 12/119 -- train_loss: 1.1610 
2025-08-11 15:47:48,841 - INFO - Epoch: 295/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:51:23,561 - INFO - Epoch: 296/500, Iter: 1/119 -- train_loss: 1.1725 


[1/119]   1%|           [00:00<?]

2025-08-11 15:51:26,844 - INFO - Epoch: 296/500, Iter: 2/119 -- train_loss: 1.1735 
2025-08-11 15:51:27,137 - INFO - Epoch: 296/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 15:51:27,428 - INFO - Epoch: 296/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 15:51:27,704 - INFO - Epoch: 296/500, Iter: 5/119 -- train_loss: 1.1229 
2025-08-11 15:51:27,994 - INFO - Epoch: 296/500, Iter: 6/119 -- train_loss: 1.1744 
2025-08-11 15:51:28,310 - INFO - Epoch: 296/500, Iter: 7/119 -- train_loss: 1.0263 
2025-08-11 15:51:29,148 - INFO - Epoch: 296/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 15:51:29,444 - INFO - Epoch: 296/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 15:51:38,572 - INFO - Epoch: 296/500, Iter: 10/119 -- train_loss: 0.9347 
2025-08-11 15:51:38,865 - INFO - Epoch: 296/500, Iter: 11/119 -- train_loss: 1.1760 
2025-08-11 15:51:39,175 - INFO - Epoch: 296/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 15:51:39,461 - INFO - Epoch: 296/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:55:12,135 - INFO - Epoch: 297/500, Iter: 1/119 -- train_loss: 1.1890 


[1/119]   1%|           [00:00<?]

2025-08-11 15:55:12,441 - INFO - Epoch: 297/500, Iter: 2/119 -- train_loss: 1.1726 
2025-08-11 15:55:12,718 - INFO - Epoch: 297/500, Iter: 3/119 -- train_loss: 1.1744 
2025-08-11 15:55:13,035 - INFO - Epoch: 297/500, Iter: 4/119 -- train_loss: 1.1372 
2025-08-11 15:55:13,344 - INFO - Epoch: 297/500, Iter: 5/119 -- train_loss: 1.1320 
2025-08-11 15:55:13,642 - INFO - Epoch: 297/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 15:55:14,943 - INFO - Epoch: 297/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 15:55:15,251 - INFO - Epoch: 297/500, Iter: 8/119 -- train_loss: 0.9659 
2025-08-11 15:55:23,858 - INFO - Epoch: 297/500, Iter: 9/119 -- train_loss: 1.1747 
2025-08-11 15:55:24,136 - INFO - Epoch: 297/500, Iter: 10/119 -- train_loss: 1.1428 
2025-08-11 15:55:24,425 - INFO - Epoch: 297/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 15:55:24,750 - INFO - Epoch: 297/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 15:55:25,061 - INFO - Epoch: 297/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 15:58:55,033 - INFO - Epoch: 298/500, Iter: 1/119 -- train_loss: 1.1750 


[1/119]   1%|           [00:00<?]

2025-08-11 15:58:55,341 - INFO - Epoch: 298/500, Iter: 2/119 -- train_loss: 1.1700 
2025-08-11 15:58:55,667 - INFO - Epoch: 298/500, Iter: 3/119 -- train_loss: 1.1705 
2025-08-11 15:58:55,986 - INFO - Epoch: 298/500, Iter: 4/119 -- train_loss: 1.1725 
2025-08-11 15:58:56,308 - INFO - Epoch: 298/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 15:58:56,617 - INFO - Epoch: 298/500, Iter: 6/119 -- train_loss: 1.1560 
2025-08-11 15:58:58,001 - INFO - Epoch: 298/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 15:58:58,291 - INFO - Epoch: 298/500, Iter: 8/119 -- train_loss: 1.1571 
2025-08-11 15:59:00,578 - INFO - Epoch: 298/500, Iter: 9/119 -- train_loss: 1.1390 
2025-08-11 15:59:00,922 - INFO - Epoch: 298/500, Iter: 10/119 -- train_loss: 1.1380 
2025-08-11 15:59:01,224 - INFO - Epoch: 298/500, Iter: 11/119 -- train_loss: 1.1342 
2025-08-11 15:59:01,500 - INFO - Epoch: 298/500, Iter: 12/119 -- train_loss: 1.1384 
2025-08-11 15:59:01,817 - INFO - Epoch: 298/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:02:37,165 - INFO - Epoch: 299/500, Iter: 1/119 -- train_loss: 1.1746 


[1/119]   1%|           [00:00<?]

2025-08-11 16:02:40,674 - INFO - Epoch: 299/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 16:02:40,957 - INFO - Epoch: 299/500, Iter: 3/119 -- train_loss: 1.1698 
2025-08-11 16:02:41,280 - INFO - Epoch: 299/500, Iter: 4/119 -- train_loss: 1.1645 
2025-08-11 16:02:41,587 - INFO - Epoch: 299/500, Iter: 5/119 -- train_loss: 1.1616 
2025-08-11 16:02:41,887 - INFO - Epoch: 299/500, Iter: 6/119 -- train_loss: 1.0983 
2025-08-11 16:02:42,174 - INFO - Epoch: 299/500, Iter: 7/119 -- train_loss: 1.0314 
2025-08-11 16:02:42,464 - INFO - Epoch: 299/500, Iter: 8/119 -- train_loss: 1.1211 
2025-08-11 16:02:42,753 - INFO - Epoch: 299/500, Iter: 9/119 -- train_loss: 1.1285 
2025-08-11 16:02:48,117 - INFO - Epoch: 299/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 16:02:48,444 - INFO - Epoch: 299/500, Iter: 11/119 -- train_loss: 1.1350 
2025-08-11 16:02:50,451 - INFO - Epoch: 299/500, Iter: 12/119 -- train_loss: 1.1698 
2025-08-11 16:02:50,724 - INFO - Epoch: 299/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:06:33,519 - INFO - Epoch: 300/500, Iter: 1/119 -- train_loss: 1.1677 


[1/119]   1%|           [00:00<?]

2025-08-11 16:06:33,822 - INFO - Epoch: 300/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 16:06:34,120 - INFO - Epoch: 300/500, Iter: 3/119 -- train_loss: 1.1587 
2025-08-11 16:06:34,442 - INFO - Epoch: 300/500, Iter: 4/119 -- train_loss: 1.1688 
2025-08-11 16:06:34,718 - INFO - Epoch: 300/500, Iter: 5/119 -- train_loss: 1.1321 
2025-08-11 16:06:35,042 - INFO - Epoch: 300/500, Iter: 6/119 -- train_loss: 1.1521 
2025-08-11 16:06:39,830 - INFO - Epoch: 300/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 16:06:40,127 - INFO - Epoch: 300/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 16:06:41,073 - INFO - Epoch: 300/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 16:06:41,360 - INFO - Epoch: 300/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 16:06:41,689 - INFO - Epoch: 300/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 16:06:42,000 - INFO - Epoch: 300/500, Iter: 12/119 -- train_loss: 1.1491 
2025-08-11 16:06:42,329 - INFO - Epoch: 300/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:10:29,241 - INFO - Epoch: 301/500, Iter: 1/119 -- train_loss: 1.1479 


[1/119]   1%|           [00:00<?]

2025-08-11 16:10:31,950 - INFO - Epoch: 301/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 16:10:32,289 - INFO - Epoch: 301/500, Iter: 3/119 -- train_loss: 1.1745 
2025-08-11 16:10:32,629 - INFO - Epoch: 301/500, Iter: 4/119 -- train_loss: 1.1652 
2025-08-11 16:10:32,949 - INFO - Epoch: 301/500, Iter: 5/119 -- train_loss: 1.1745 
2025-08-11 16:10:33,279 - INFO - Epoch: 301/500, Iter: 6/119 -- train_loss: 1.0708 
2025-08-11 16:10:33,630 - INFO - Epoch: 301/500, Iter: 7/119 -- train_loss: 1.1581 
2025-08-11 16:10:36,715 - INFO - Epoch: 301/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 16:10:37,067 - INFO - Epoch: 301/500, Iter: 9/119 -- train_loss: 1.0838 
2025-08-11 16:10:43,801 - INFO - Epoch: 301/500, Iter: 10/119 -- train_loss: 1.1744 
2025-08-11 16:10:44,136 - INFO - Epoch: 301/500, Iter: 11/119 -- train_loss: 1.1714 
2025-08-11 16:10:44,474 - INFO - Epoch: 301/500, Iter: 12/119 -- train_loss: 1.0896 
2025-08-11 16:10:44,816 - INFO - Epoch: 301/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:15:25,326 - INFO - Epoch: 302/500, Iter: 1/119 -- train_loss: 1.1741 


[1/119]   1%|           [00:00<?]

2025-08-11 16:15:25,643 - INFO - Epoch: 302/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 16:15:25,977 - INFO - Epoch: 302/500, Iter: 3/119 -- train_loss: 1.1709 
2025-08-11 16:15:26,323 - INFO - Epoch: 302/500, Iter: 4/119 -- train_loss: 1.1744 
2025-08-11 16:15:27,385 - INFO - Epoch: 302/500, Iter: 5/119 -- train_loss: 1.1304 
2025-08-11 16:15:27,751 - INFO - Epoch: 302/500, Iter: 6/119 -- train_loss: 1.1095 
2025-08-11 16:15:28,057 - INFO - Epoch: 302/500, Iter: 7/119 -- train_loss: 1.1485 
2025-08-11 16:15:28,402 - INFO - Epoch: 302/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 16:15:30,224 - INFO - Epoch: 302/500, Iter: 9/119 -- train_loss: 1.1216 
2025-08-11 16:15:30,561 - INFO - Epoch: 302/500, Iter: 10/119 -- train_loss: 1.1564 
2025-08-11 16:15:34,286 - INFO - Epoch: 302/500, Iter: 11/119 -- train_loss: 1.1530 
2025-08-11 16:15:34,612 - INFO - Epoch: 302/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 16:15:34,948 - INFO - Epoch: 302/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:20:04,726 - INFO - Epoch: 303/500, Iter: 1/119 -- train_loss: 1.1708 


[1/119]   1%|           [00:00<?]

2025-08-11 16:20:05,571 - INFO - Epoch: 303/500, Iter: 2/119 -- train_loss: 1.1681 
2025-08-11 16:20:11,919 - INFO - Epoch: 303/500, Iter: 3/119 -- train_loss: 1.0690 
2025-08-11 16:20:12,357 - INFO - Epoch: 303/500, Iter: 4/119 -- train_loss: 1.1657 
2025-08-11 16:20:12,789 - INFO - Epoch: 303/500, Iter: 5/119 -- train_loss: 1.1766 
2025-08-11 16:20:13,244 - INFO - Epoch: 303/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 16:20:13,773 - INFO - Epoch: 303/500, Iter: 7/119 -- train_loss: 1.0899 
2025-08-11 16:20:14,315 - INFO - Epoch: 303/500, Iter: 8/119 -- train_loss: 1.1519 
2025-08-11 16:20:33,327 - INFO - Epoch: 303/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 16:20:33,730 - INFO - Epoch: 303/500, Iter: 10/119 -- train_loss: 1.1718 
2025-08-11 16:20:34,120 - INFO - Epoch: 303/500, Iter: 11/119 -- train_loss: 1.1143 
2025-08-11 16:20:34,523 - INFO - Epoch: 303/500, Iter: 12/119 -- train_loss: 1.1795 
2025-08-11 16:20:35,026 - INFO - Epoch: 303/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:25:09,382 - INFO - Epoch: 304/500, Iter: 1/119 -- train_loss: 1.1307 


[1/119]   1%|           [00:00<?]

2025-08-11 16:25:09,653 - INFO - Epoch: 304/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 16:25:09,965 - INFO - Epoch: 304/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 16:25:10,264 - INFO - Epoch: 304/500, Iter: 4/119 -- train_loss: 1.1678 
2025-08-11 16:25:10,579 - INFO - Epoch: 304/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 16:25:12,000 - INFO - Epoch: 304/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 16:25:12,260 - INFO - Epoch: 304/500, Iter: 7/119 -- train_loss: 1.1508 
2025-08-11 16:25:19,761 - INFO - Epoch: 304/500, Iter: 8/119 -- train_loss: 1.1654 
2025-08-11 16:25:20,047 - INFO - Epoch: 304/500, Iter: 9/119 -- train_loss: 1.1642 
2025-08-11 16:25:20,293 - INFO - Epoch: 304/500, Iter: 10/119 -- train_loss: 1.1721 
2025-08-11 16:25:20,564 - INFO - Epoch: 304/500, Iter: 11/119 -- train_loss: 1.1580 
2025-08-11 16:25:20,822 - INFO - Epoch: 304/500, Iter: 12/119 -- train_loss: 1.1357 
2025-08-11 16:25:21,069 - INFO - Epoch: 304/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:28:45,861 - INFO - Epoch: 305/500, Iter: 1/119 -- train_loss: 1.1597 


[1/119]   1%|           [00:00<?]

2025-08-11 16:28:46,142 - INFO - Epoch: 305/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 16:28:46,432 - INFO - Epoch: 305/500, Iter: 3/119 -- train_loss: 1.1639 
2025-08-11 16:28:46,684 - INFO - Epoch: 305/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 16:28:46,956 - INFO - Epoch: 305/500, Iter: 5/119 -- train_loss: 1.1314 
2025-08-11 16:28:47,239 - INFO - Epoch: 305/500, Iter: 6/119 -- train_loss: 1.1675 
2025-08-11 16:28:47,522 - INFO - Epoch: 305/500, Iter: 7/119 -- train_loss: 1.1606 
2025-08-11 16:28:47,803 - INFO - Epoch: 305/500, Iter: 8/119 -- train_loss: 1.1720 
2025-08-11 16:28:55,440 - INFO - Epoch: 305/500, Iter: 9/119 -- train_loss: 1.1589 
2025-08-11 16:28:55,676 - INFO - Epoch: 305/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 16:28:55,939 - INFO - Epoch: 305/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 16:28:56,182 - INFO - Epoch: 305/500, Iter: 12/119 -- train_loss: 1.1644 
2025-08-11 16:28:56,435 - INFO - Epoch: 305/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:32:21,660 - INFO - Epoch: 306/500, Iter: 1/119 -- train_loss: 1.1741 


[1/119]   1%|           [00:00<?]

2025-08-11 16:32:21,962 - INFO - Epoch: 306/500, Iter: 2/119 -- train_loss: 1.1832 
2025-08-11 16:32:22,858 - INFO - Epoch: 306/500, Iter: 3/119 -- train_loss: 1.1768 
2025-08-11 16:32:23,190 - INFO - Epoch: 306/500, Iter: 4/119 -- train_loss: 1.1126 
2025-08-11 16:32:23,473 - INFO - Epoch: 306/500, Iter: 5/119 -- train_loss: 1.1743 
2025-08-11 16:32:23,770 - INFO - Epoch: 306/500, Iter: 6/119 -- train_loss: 1.1417 
2025-08-11 16:32:24,072 - INFO - Epoch: 306/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 16:32:24,326 - INFO - Epoch: 306/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 16:32:30,241 - INFO - Epoch: 306/500, Iter: 9/119 -- train_loss: 1.1743 
2025-08-11 16:32:30,525 - INFO - Epoch: 306/500, Iter: 10/119 -- train_loss: 1.1747 
2025-08-11 16:32:30,809 - INFO - Epoch: 306/500, Iter: 11/119 -- train_loss: 1.1509 
2025-08-11 16:32:31,113 - INFO - Epoch: 306/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 16:32:31,413 - INFO - Epoch: 306/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:35:53,414 - INFO - Epoch: 307/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 16:35:53,713 - INFO - Epoch: 307/500, Iter: 2/119 -- train_loss: 1.1264 
2025-08-11 16:35:53,969 - INFO - Epoch: 307/500, Iter: 3/119 -- train_loss: 1.1777 
2025-08-11 16:35:54,232 - INFO - Epoch: 307/500, Iter: 4/119 -- train_loss: 1.1792 
2025-08-11 16:35:54,538 - INFO - Epoch: 307/500, Iter: 5/119 -- train_loss: 1.1693 
2025-08-11 16:35:55,620 - INFO - Epoch: 307/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 16:35:55,889 - INFO - Epoch: 307/500, Iter: 7/119 -- train_loss: 1.1820 
2025-08-11 16:35:59,454 - INFO - Epoch: 307/500, Iter: 8/119 -- train_loss: 1.1682 
2025-08-11 16:35:59,739 - INFO - Epoch: 307/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 16:36:00,001 - INFO - Epoch: 307/500, Iter: 10/119 -- train_loss: 1.1683 
2025-08-11 16:36:00,275 - INFO - Epoch: 307/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 16:36:00,556 - INFO - Epoch: 307/500, Iter: 12/119 -- train_loss: 1.1445 
2025-08-11 16:36:00,807 - INFO - Epoch: 307/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:39:19,910 - INFO - Epoch: 308/500, Iter: 1/119 -- train_loss: 1.1741 


[1/119]   1%|           [00:00<?]

2025-08-11 16:39:20,997 - INFO - Epoch: 308/500, Iter: 2/119 -- train_loss: 1.1349 
2025-08-11 16:39:25,248 - INFO - Epoch: 308/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 16:39:25,532 - INFO - Epoch: 308/500, Iter: 4/119 -- train_loss: 1.1436 
2025-08-11 16:39:25,825 - INFO - Epoch: 308/500, Iter: 5/119 -- train_loss: 1.1561 
2025-08-11 16:39:26,122 - INFO - Epoch: 308/500, Iter: 6/119 -- train_loss: 1.1531 
2025-08-11 16:39:26,972 - INFO - Epoch: 308/500, Iter: 7/119 -- train_loss: 1.1243 
2025-08-11 16:39:27,316 - INFO - Epoch: 308/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 16:39:27,610 - INFO - Epoch: 308/500, Iter: 9/119 -- train_loss: 1.1749 
2025-08-11 16:39:27,873 - INFO - Epoch: 308/500, Iter: 10/119 -- train_loss: 1.0111 
2025-08-11 16:39:29,449 - INFO - Epoch: 308/500, Iter: 11/119 -- train_loss: 1.1723 
2025-08-11 16:39:33,666 - INFO - Epoch: 308/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 16:39:33,912 - INFO - Epoch: 308/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:42:55,197 - INFO - Epoch: 309/500, Iter: 1/119 -- train_loss: 1.0283 


[1/119]   1%|           [00:00<?]

2025-08-11 16:42:55,438 - INFO - Epoch: 309/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 16:42:57,071 - INFO - Epoch: 309/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 16:42:59,412 - INFO - Epoch: 309/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 16:42:59,704 - INFO - Epoch: 309/500, Iter: 5/119 -- train_loss: 1.1440 
2025-08-11 16:43:00,014 - INFO - Epoch: 309/500, Iter: 6/119 -- train_loss: 1.1484 
2025-08-11 16:43:00,262 - INFO - Epoch: 309/500, Iter: 7/119 -- train_loss: 1.1822 
2025-08-11 16:43:00,559 - INFO - Epoch: 309/500, Iter: 8/119 -- train_loss: 1.1685 
2025-08-11 16:43:01,402 - INFO - Epoch: 309/500, Iter: 9/119 -- train_loss: 1.1389 
2025-08-11 16:43:01,696 - INFO - Epoch: 309/500, Iter: 10/119 -- train_loss: 1.1831 
2025-08-11 16:43:04,008 - INFO - Epoch: 309/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 16:43:05,827 - INFO - Epoch: 309/500, Iter: 12/119 -- train_loss: 1.1746 
2025-08-11 16:43:06,106 - INFO - Epoch: 309/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:46:19,978 - INFO - Epoch: 310/500, Iter: 1/119 -- train_loss: 1.1283 


[1/119]   1%|           [00:00<?]

2025-08-11 16:46:26,966 - INFO - Epoch: 310/500, Iter: 2/119 -- train_loss: 1.1491 
2025-08-11 16:46:27,244 - INFO - Epoch: 310/500, Iter: 3/119 -- train_loss: 1.0103 
2025-08-11 16:46:27,540 - INFO - Epoch: 310/500, Iter: 4/119 -- train_loss: 1.1714 
2025-08-11 16:46:27,842 - INFO - Epoch: 310/500, Iter: 5/119 -- train_loss: 1.1205 
2025-08-11 16:46:28,134 - INFO - Epoch: 310/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 16:46:28,437 - INFO - Epoch: 310/500, Iter: 7/119 -- train_loss: 1.1468 
2025-08-11 16:46:28,724 - INFO - Epoch: 310/500, Iter: 8/119 -- train_loss: 1.1502 
2025-08-11 16:46:29,032 - INFO - Epoch: 310/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 16:46:32,081 - INFO - Epoch: 310/500, Iter: 10/119 -- train_loss: 1.1744 
2025-08-11 16:46:32,375 - INFO - Epoch: 310/500, Iter: 11/119 -- train_loss: 1.0552 
2025-08-11 16:46:34,598 - INFO - Epoch: 310/500, Iter: 12/119 -- train_loss: 1.1164 
2025-08-11 16:46:34,891 - INFO - Epoch: 310/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:49:51,555 - INFO - Epoch: 311/500, Iter: 1/119 -- train_loss: 1.1495 


[1/119]   1%|           [00:00<?]

2025-08-11 16:49:51,812 - INFO - Epoch: 311/500, Iter: 2/119 -- train_loss: 1.1643 
2025-08-11 16:49:52,071 - INFO - Epoch: 311/500, Iter: 3/119 -- train_loss: 1.1695 
2025-08-11 16:49:55,221 - INFO - Epoch: 311/500, Iter: 4/119 -- train_loss: 1.0902 
2025-08-11 16:49:55,480 - INFO - Epoch: 311/500, Iter: 5/119 -- train_loss: 1.1367 
2025-08-11 16:49:59,270 - INFO - Epoch: 311/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 16:49:59,531 - INFO - Epoch: 311/500, Iter: 7/119 -- train_loss: 1.1585 
2025-08-11 16:49:59,773 - INFO - Epoch: 311/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 16:50:00,049 - INFO - Epoch: 311/500, Iter: 9/119 -- train_loss: 1.1132 
2025-08-11 16:50:00,301 - INFO - Epoch: 311/500, Iter: 10/119 -- train_loss: 1.1083 
2025-08-11 16:50:00,548 - INFO - Epoch: 311/500, Iter: 11/119 -- train_loss: 1.1700 
2025-08-11 16:50:00,820 - INFO - Epoch: 311/500, Iter: 12/119 -- train_loss: 1.1652 
2025-08-11 16:50:01,074 - INFO - Epoch: 311/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:53:56,186 - INFO - Epoch: 312/500, Iter: 1/119 -- train_loss: 1.1755 


[1/119]   1%|           [00:00<?]

2025-08-11 16:53:56,433 - INFO - Epoch: 312/500, Iter: 2/119 -- train_loss: 1.1756 
2025-08-11 16:53:56,696 - INFO - Epoch: 312/500, Iter: 3/119 -- train_loss: 1.1385 
2025-08-11 16:53:56,957 - INFO - Epoch: 312/500, Iter: 4/119 -- train_loss: 1.1727 
2025-08-11 16:53:57,217 - INFO - Epoch: 312/500, Iter: 5/119 -- train_loss: 1.1494 
2025-08-11 16:53:57,469 - INFO - Epoch: 312/500, Iter: 6/119 -- train_loss: 1.1807 
2025-08-11 16:54:00,924 - INFO - Epoch: 312/500, Iter: 7/119 -- train_loss: 1.1756 
2025-08-11 16:54:01,174 - INFO - Epoch: 312/500, Iter: 8/119 -- train_loss: 1.1739 
2025-08-11 16:54:02,539 - INFO - Epoch: 312/500, Iter: 9/119 -- train_loss: 1.1746 
2025-08-11 16:54:05,323 - INFO - Epoch: 312/500, Iter: 10/119 -- train_loss: 1.1752 
2025-08-11 16:54:05,589 - INFO - Epoch: 312/500, Iter: 11/119 -- train_loss: 1.1048 
2025-08-11 16:54:05,828 - INFO - Epoch: 312/500, Iter: 12/119 -- train_loss: 1.1743 
2025-08-11 16:54:06,090 - INFO - Epoch: 312/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 16:57:42,180 - INFO - Epoch: 313/500, Iter: 1/119 -- train_loss: 1.1721 


[1/119]   1%|           [00:00<?]

2025-08-11 16:57:42,427 - INFO - Epoch: 313/500, Iter: 2/119 -- train_loss: 1.1328 
2025-08-11 16:57:42,710 - INFO - Epoch: 313/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 16:57:42,951 - INFO - Epoch: 313/500, Iter: 4/119 -- train_loss: 1.0993 
2025-08-11 16:57:43,183 - INFO - Epoch: 313/500, Iter: 5/119 -- train_loss: 1.1454 
2025-08-11 16:57:43,422 - INFO - Epoch: 313/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 16:57:43,671 - INFO - Epoch: 313/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 16:57:43,904 - INFO - Epoch: 313/500, Iter: 8/119 -- train_loss: 1.0919 
2025-08-11 16:57:50,400 - INFO - Epoch: 313/500, Iter: 9/119 -- train_loss: 1.1749 
2025-08-11 16:57:50,637 - INFO - Epoch: 313/500, Iter: 10/119 -- train_loss: 1.1491 
2025-08-11 16:57:50,873 - INFO - Epoch: 313/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 16:57:51,110 - INFO - Epoch: 313/500, Iter: 12/119 -- train_loss: 1.1704 
2025-08-11 16:57:51,350 - INFO - Epoch: 313/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:01:19,095 - INFO - Epoch: 314/500, Iter: 1/119 -- train_loss: 1.1749 


[1/119]   1%|           [00:00<?]

2025-08-11 17:01:19,378 - INFO - Epoch: 314/500, Iter: 2/119 -- train_loss: 1.0894 
2025-08-11 17:01:21,791 - INFO - Epoch: 314/500, Iter: 3/119 -- train_loss: 1.1676 
2025-08-11 17:01:22,063 - INFO - Epoch: 314/500, Iter: 4/119 -- train_loss: 1.0717 
2025-08-11 17:01:22,338 - INFO - Epoch: 314/500, Iter: 5/119 -- train_loss: 1.1730 
2025-08-11 17:01:22,587 - INFO - Epoch: 314/500, Iter: 6/119 -- train_loss: 1.1629 
2025-08-11 17:01:27,873 - INFO - Epoch: 314/500, Iter: 7/119 -- train_loss: 1.1543 
2025-08-11 17:01:28,143 - INFO - Epoch: 314/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 17:01:29,345 - INFO - Epoch: 314/500, Iter: 9/119 -- train_loss: 1.1725 
2025-08-11 17:01:29,597 - INFO - Epoch: 314/500, Iter: 10/119 -- train_loss: 1.1485 
2025-08-11 17:01:29,844 - INFO - Epoch: 314/500, Iter: 11/119 -- train_loss: 1.1786 
2025-08-11 17:01:30,112 - INFO - Epoch: 314/500, Iter: 12/119 -- train_loss: 1.1674 
2025-08-11 17:01:30,364 - INFO - Epoch: 314/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:05:08,042 - INFO - Epoch: 315/500, Iter: 1/119 -- train_loss: 1.1477 


[1/119]   1%|           [00:00<?]

2025-08-11 17:05:10,758 - INFO - Epoch: 315/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 17:05:11,150 - INFO - Epoch: 315/500, Iter: 3/119 -- train_loss: 1.1628 
2025-08-11 17:05:11,425 - INFO - Epoch: 315/500, Iter: 4/119 -- train_loss: 1.1744 
2025-08-11 17:05:11,677 - INFO - Epoch: 315/500, Iter: 5/119 -- train_loss: 1.0286 
2025-08-11 17:05:11,928 - INFO - Epoch: 315/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 17:05:12,159 - INFO - Epoch: 315/500, Iter: 7/119 -- train_loss: 1.1049 
2025-08-11 17:05:12,411 - INFO - Epoch: 315/500, Iter: 8/119 -- train_loss: 1.1494 
2025-08-11 17:05:13,383 - INFO - Epoch: 315/500, Iter: 9/119 -- train_loss: 1.1752 
2025-08-11 17:05:15,306 - INFO - Epoch: 315/500, Iter: 10/119 -- train_loss: 1.1687 
2025-08-11 17:05:21,068 - INFO - Epoch: 315/500, Iter: 11/119 -- train_loss: 1.1728 
2025-08-11 17:05:21,307 - INFO - Epoch: 315/500, Iter: 12/119 -- train_loss: 1.1731 
2025-08-11 17:05:21,552 - INFO - Epoch: 315/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:08:55,002 - INFO - Epoch: 316/500, Iter: 1/119 -- train_loss: 1.1611 


[1/119]   1%|           [00:00<?]

2025-08-11 17:08:55,264 - INFO - Epoch: 316/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 17:08:55,508 - INFO - Epoch: 316/500, Iter: 3/119 -- train_loss: 1.0842 
2025-08-11 17:08:55,774 - INFO - Epoch: 316/500, Iter: 4/119 -- train_loss: 1.0561 
2025-08-11 17:08:56,021 - INFO - Epoch: 316/500, Iter: 5/119 -- train_loss: 1.1191 
2025-08-11 17:08:56,309 - INFO - Epoch: 316/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 17:08:56,558 - INFO - Epoch: 316/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 17:08:56,809 - INFO - Epoch: 316/500, Iter: 8/119 -- train_loss: 1.0369 
2025-08-11 17:08:59,862 - INFO - Epoch: 316/500, Iter: 9/119 -- train_loss: 1.1543 
2025-08-11 17:09:00,130 - INFO - Epoch: 316/500, Iter: 10/119 -- train_loss: 1.1724 
2025-08-11 17:09:00,362 - INFO - Epoch: 316/500, Iter: 11/119 -- train_loss: 1.1491 
2025-08-11 17:09:01,240 - INFO - Epoch: 316/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 17:09:04,843 - INFO - Epoch: 316/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:12:32,677 - INFO - Epoch: 317/500, Iter: 1/119 -- train_loss: 1.1631 


[1/119]   1%|           [00:00<?]

2025-08-11 17:12:34,351 - INFO - Epoch: 317/500, Iter: 2/119 -- train_loss: 1.1745 
2025-08-11 17:12:34,614 - INFO - Epoch: 317/500, Iter: 3/119 -- train_loss: 1.1683 
2025-08-11 17:12:34,935 - INFO - Epoch: 317/500, Iter: 4/119 -- train_loss: 1.1753 
2025-08-11 17:12:35,226 - INFO - Epoch: 317/500, Iter: 5/119 -- train_loss: 1.1741 
2025-08-11 17:12:35,546 - INFO - Epoch: 317/500, Iter: 6/119 -- train_loss: 1.1597 
2025-08-11 17:12:35,851 - INFO - Epoch: 317/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 17:12:36,191 - INFO - Epoch: 317/500, Iter: 8/119 -- train_loss: 1.1047 
2025-08-11 17:12:42,719 - INFO - Epoch: 317/500, Iter: 9/119 -- train_loss: 1.1740 
2025-08-11 17:12:43,010 - INFO - Epoch: 317/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 17:12:43,324 - INFO - Epoch: 317/500, Iter: 11/119 -- train_loss: 1.1573 
2025-08-11 17:12:43,621 - INFO - Epoch: 317/500, Iter: 12/119 -- train_loss: 1.1746 
2025-08-11 17:12:43,933 - INFO - Epoch: 317/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:16:17,818 - INFO - Epoch: 318/500, Iter: 1/119 -- train_loss: 1.1416 


[1/119]   1%|           [00:00<?]

2025-08-11 17:16:18,121 - INFO - Epoch: 318/500, Iter: 2/119 -- train_loss: 1.1696 
2025-08-11 17:16:18,405 - INFO - Epoch: 318/500, Iter: 3/119 -- train_loss: 1.1741 
2025-08-11 17:16:18,717 - INFO - Epoch: 318/500, Iter: 4/119 -- train_loss: 1.1674 
2025-08-11 17:16:18,957 - INFO - Epoch: 318/500, Iter: 5/119 -- train_loss: 1.1333 
2025-08-11 17:16:19,254 - INFO - Epoch: 318/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 17:16:19,553 - INFO - Epoch: 318/500, Iter: 7/119 -- train_loss: 1.1418 
2025-08-11 17:16:19,847 - INFO - Epoch: 318/500, Iter: 8/119 -- train_loss: 1.1381 
2025-08-11 17:16:26,585 - INFO - Epoch: 318/500, Iter: 9/119 -- train_loss: 1.1714 
2025-08-11 17:16:26,836 - INFO - Epoch: 318/500, Iter: 10/119 -- train_loss: 1.1485 
2025-08-11 17:16:27,094 - INFO - Epoch: 318/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 17:16:27,334 - INFO - Epoch: 318/500, Iter: 12/119 -- train_loss: 1.0962 
2025-08-11 17:16:27,592 - INFO - Epoch: 318/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:19:40,762 - INFO - Epoch: 319/500, Iter: 1/119 -- train_loss: 1.1741 


[1/119]   1%|           [00:00<?]

2025-08-11 17:19:43,778 - INFO - Epoch: 319/500, Iter: 2/119 -- train_loss: 1.1703 
2025-08-11 17:19:45,040 - INFO - Epoch: 319/500, Iter: 3/119 -- train_loss: 1.1188 
2025-08-11 17:19:45,314 - INFO - Epoch: 319/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 17:19:45,569 - INFO - Epoch: 319/500, Iter: 5/119 -- train_loss: 1.1450 
2025-08-11 17:19:45,876 - INFO - Epoch: 319/500, Iter: 6/119 -- train_loss: 1.1744 
2025-08-11 17:19:46,144 - INFO - Epoch: 319/500, Iter: 7/119 -- train_loss: 1.1669 
2025-08-11 17:19:46,406 - INFO - Epoch: 319/500, Iter: 8/119 -- train_loss: 1.1039 
2025-08-11 17:19:46,702 - INFO - Epoch: 319/500, Iter: 9/119 -- train_loss: 1.1742 
2025-08-11 17:19:49,094 - INFO - Epoch: 319/500, Iter: 10/119 -- train_loss: 1.1746 
2025-08-11 17:19:55,937 - INFO - Epoch: 319/500, Iter: 11/119 -- train_loss: 1.1754 
2025-08-11 17:19:56,184 - INFO - Epoch: 319/500, Iter: 12/119 -- train_loss: 1.1700 
2025-08-11 17:19:56,441 - INFO - Epoch: 319/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:23:01,910 - INFO - Epoch: 320/500, Iter: 1/119 -- train_loss: 1.1741 


[1/119]   1%|           [00:00<?]

2025-08-11 17:23:02,808 - INFO - Epoch: 320/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 17:23:05,233 - INFO - Epoch: 320/500, Iter: 3/119 -- train_loss: 1.1478 
2025-08-11 17:23:05,572 - INFO - Epoch: 320/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 17:23:05,876 - INFO - Epoch: 320/500, Iter: 5/119 -- train_loss: 1.1716 
2025-08-11 17:23:06,302 - INFO - Epoch: 320/500, Iter: 6/119 -- train_loss: 1.1768 
2025-08-11 17:23:06,586 - INFO - Epoch: 320/500, Iter: 7/119 -- train_loss: 1.1414 
2025-08-11 17:23:06,888 - INFO - Epoch: 320/500, Iter: 8/119 -- train_loss: 1.1729 
2025-08-11 17:23:07,201 - INFO - Epoch: 320/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 17:23:07,581 - INFO - Epoch: 320/500, Iter: 10/119 -- train_loss: 1.1569 
2025-08-11 17:23:10,416 - INFO - Epoch: 320/500, Iter: 11/119 -- train_loss: 1.1743 
2025-08-11 17:23:16,730 - INFO - Epoch: 320/500, Iter: 12/119 -- train_loss: 1.1455 
2025-08-11 17:23:17,000 - INFO - Epoch: 320/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:26:41,654 - INFO - Epoch: 321/500, Iter: 1/119 -- train_loss: 1.1301 


[1/119]   1%|           [00:00<?]

2025-08-11 17:26:41,936 - INFO - Epoch: 321/500, Iter: 2/119 -- train_loss: 1.1761 
2025-08-11 17:26:42,208 - INFO - Epoch: 321/500, Iter: 3/119 -- train_loss: 1.1736 
2025-08-11 17:26:42,471 - INFO - Epoch: 321/500, Iter: 4/119 -- train_loss: 1.0816 
2025-08-11 17:26:42,739 - INFO - Epoch: 321/500, Iter: 5/119 -- train_loss: 1.1746 
2025-08-11 17:26:43,571 - INFO - Epoch: 321/500, Iter: 6/119 -- train_loss: 1.1609 
2025-08-11 17:26:43,824 - INFO - Epoch: 321/500, Iter: 7/119 -- train_loss: 1.0545 
2025-08-11 17:26:44,083 - INFO - Epoch: 321/500, Iter: 8/119 -- train_loss: 1.1111 
2025-08-11 17:26:45,395 - INFO - Epoch: 321/500, Iter: 9/119 -- train_loss: 1.0814 
2025-08-11 17:26:45,772 - INFO - Epoch: 321/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 17:26:48,026 - INFO - Epoch: 321/500, Iter: 11/119 -- train_loss: 1.1534 
2025-08-11 17:26:48,327 - INFO - Epoch: 321/500, Iter: 12/119 -- train_loss: 1.1738 
2025-08-11 17:26:48,601 - INFO - Epoch: 321/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:30:20,179 - INFO - Epoch: 322/500, Iter: 1/119 -- train_loss: 1.1424 


[1/119]   1%|           [00:00<?]

2025-08-11 17:30:24,926 - INFO - Epoch: 322/500, Iter: 2/119 -- train_loss: 1.1755 
2025-08-11 17:30:25,177 - INFO - Epoch: 322/500, Iter: 3/119 -- train_loss: 1.1576 
2025-08-11 17:30:25,435 - INFO - Epoch: 322/500, Iter: 4/119 -- train_loss: 1.1534 
2025-08-11 17:30:25,681 - INFO - Epoch: 322/500, Iter: 5/119 -- train_loss: 1.1271 
2025-08-11 17:30:26,002 - INFO - Epoch: 322/500, Iter: 6/119 -- train_loss: 0.9492 
2025-08-11 17:30:26,284 - INFO - Epoch: 322/500, Iter: 7/119 -- train_loss: 1.1461 
2025-08-11 17:30:26,571 - INFO - Epoch: 322/500, Iter: 8/119 -- train_loss: 1.1464 
2025-08-11 17:30:26,869 - INFO - Epoch: 322/500, Iter: 9/119 -- train_loss: 1.1751 
2025-08-11 17:30:33,993 - INFO - Epoch: 322/500, Iter: 10/119 -- train_loss: 1.1686 
2025-08-11 17:30:34,281 - INFO - Epoch: 322/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 17:30:34,578 - INFO - Epoch: 322/500, Iter: 12/119 -- train_loss: 1.1542 
2025-08-11 17:30:34,855 - INFO - Epoch: 322/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:34:16,302 - INFO - Epoch: 323/500, Iter: 1/119 -- train_loss: 1.1674 


[1/119]   1%|           [00:00<?]

2025-08-11 17:34:19,431 - INFO - Epoch: 323/500, Iter: 2/119 -- train_loss: 1.1521 
2025-08-11 17:34:19,726 - INFO - Epoch: 323/500, Iter: 3/119 -- train_loss: 1.1722 
2025-08-11 17:34:20,022 - INFO - Epoch: 323/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 17:34:20,308 - INFO - Epoch: 323/500, Iter: 5/119 -- train_loss: 1.1380 
2025-08-11 17:34:20,591 - INFO - Epoch: 323/500, Iter: 6/119 -- train_loss: 1.1622 
2025-08-11 17:34:20,893 - INFO - Epoch: 323/500, Iter: 7/119 -- train_loss: 1.1536 
2025-08-11 17:34:21,196 - INFO - Epoch: 323/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 17:34:21,487 - INFO - Epoch: 323/500, Iter: 9/119 -- train_loss: 1.1388 
2025-08-11 17:34:28,937 - INFO - Epoch: 323/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 17:34:29,205 - INFO - Epoch: 323/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 17:34:29,489 - INFO - Epoch: 323/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 17:34:29,762 - INFO - Epoch: 323/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:38:05,415 - INFO - Epoch: 324/500, Iter: 1/119 -- train_loss: 1.1618 


[1/119]   1%|           [00:00<?]

2025-08-11 17:38:05,759 - INFO - Epoch: 324/500, Iter: 2/119 -- train_loss: 1.1555 
2025-08-11 17:38:12,472 - INFO - Epoch: 324/500, Iter: 3/119 -- train_loss: 1.1608 
2025-08-11 17:38:12,812 - INFO - Epoch: 324/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 17:38:13,128 - INFO - Epoch: 324/500, Iter: 5/119 -- train_loss: 1.1742 
2025-08-11 17:38:13,474 - INFO - Epoch: 324/500, Iter: 6/119 -- train_loss: 1.1578 
2025-08-11 17:38:13,807 - INFO - Epoch: 324/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 17:38:14,155 - INFO - Epoch: 324/500, Iter: 8/119 -- train_loss: 1.1761 
2025-08-11 17:38:14,494 - INFO - Epoch: 324/500, Iter: 9/119 -- train_loss: 1.1745 
2025-08-11 17:38:17,050 - INFO - Epoch: 324/500, Iter: 10/119 -- train_loss: 1.1212 
2025-08-11 17:38:17,673 - INFO - Epoch: 324/500, Iter: 11/119 -- train_loss: 1.1737 
2025-08-11 17:38:18,033 - INFO - Epoch: 324/500, Iter: 12/119 -- train_loss: 1.1628 
2025-08-11 17:38:18,377 - INFO - Epoch: 324/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:42:37,365 - INFO - Epoch: 325/500, Iter: 1/119 -- train_loss: 1.1741 


[1/119]   1%|           [00:00<?]

2025-08-11 17:42:41,279 - INFO - Epoch: 325/500, Iter: 2/119 -- train_loss: 1.1704 
2025-08-11 17:42:47,992 - INFO - Epoch: 325/500, Iter: 3/119 -- train_loss: 1.1524 
2025-08-11 17:42:48,317 - INFO - Epoch: 325/500, Iter: 4/119 -- train_loss: 1.1750 
2025-08-11 17:42:48,638 - INFO - Epoch: 325/500, Iter: 5/119 -- train_loss: 1.1584 
2025-08-11 17:42:48,962 - INFO - Epoch: 325/500, Iter: 6/119 -- train_loss: 1.1765 
2025-08-11 17:42:49,298 - INFO - Epoch: 325/500, Iter: 7/119 -- train_loss: 1.1748 
2025-08-11 17:42:49,640 - INFO - Epoch: 325/500, Iter: 8/119 -- train_loss: 1.1744 
2025-08-11 17:42:50,014 - INFO - Epoch: 325/500, Iter: 9/119 -- train_loss: 1.1509 
2025-08-11 17:42:50,324 - INFO - Epoch: 325/500, Iter: 10/119 -- train_loss: 1.1791 
2025-08-11 17:42:56,804 - INFO - Epoch: 325/500, Iter: 11/119 -- train_loss: 1.1788 
2025-08-11 17:42:57,156 - INFO - Epoch: 325/500, Iter: 12/119 -- train_loss: 1.1759 
2025-08-11 17:42:57,432 - INFO - Epoch: 325/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:47:20,581 - INFO - Epoch: 326/500, Iter: 1/119 -- train_loss: 1.1745 


[1/119]   1%|           [00:00<?]

2025-08-11 17:47:21,086 - INFO - Epoch: 326/500, Iter: 2/119 -- train_loss: 1.1757 
2025-08-11 17:47:21,617 - INFO - Epoch: 326/500, Iter: 3/119 -- train_loss: 1.1728 
2025-08-11 17:47:22,113 - INFO - Epoch: 326/500, Iter: 4/119 -- train_loss: 1.1774 
2025-08-11 17:47:22,666 - INFO - Epoch: 326/500, Iter: 5/119 -- train_loss: 1.1727 
2025-08-11 17:47:29,627 - INFO - Epoch: 326/500, Iter: 6/119 -- train_loss: 1.1582 
2025-08-11 17:47:30,124 - INFO - Epoch: 326/500, Iter: 7/119 -- train_loss: 1.1709 
2025-08-11 17:47:30,664 - INFO - Epoch: 326/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 17:47:36,258 - INFO - Epoch: 326/500, Iter: 9/119 -- train_loss: 1.1683 
2025-08-11 17:47:36,599 - INFO - Epoch: 326/500, Iter: 10/119 -- train_loss: 1.1498 
2025-08-11 17:47:36,932 - INFO - Epoch: 326/500, Iter: 11/119 -- train_loss: 1.1097 
2025-08-11 17:47:39,979 - INFO - Epoch: 326/500, Iter: 12/119 -- train_loss: 1.1351 
2025-08-11 17:47:40,310 - INFO - Epoch: 326/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:52:02,062 - INFO - Epoch: 327/500, Iter: 1/119 -- train_loss: 1.1053 


[1/119]   1%|           [00:00<?]

2025-08-11 17:52:06,847 - INFO - Epoch: 327/500, Iter: 2/119 -- train_loss: 1.1189 
2025-08-11 17:52:13,489 - INFO - Epoch: 327/500, Iter: 3/119 -- train_loss: 1.1690 
2025-08-11 17:52:13,989 - INFO - Epoch: 327/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 17:52:14,433 - INFO - Epoch: 327/500, Iter: 5/119 -- train_loss: 1.1741 
2025-08-11 17:52:14,908 - INFO - Epoch: 327/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 17:52:15,411 - INFO - Epoch: 327/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 17:52:15,869 - INFO - Epoch: 327/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 17:52:23,881 - INFO - Epoch: 327/500, Iter: 9/119 -- train_loss: 1.1609 
2025-08-11 17:52:25,105 - INFO - Epoch: 327/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 17:52:34,225 - INFO - Epoch: 327/500, Iter: 11/119 -- train_loss: 1.1548 
2025-08-11 17:52:34,574 - INFO - Epoch: 327/500, Iter: 12/119 -- train_loss: 1.1736 
2025-08-11 17:52:34,982 - INFO - Epoch: 327/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 17:56:59,653 - INFO - Epoch: 328/500, Iter: 1/119 -- train_loss: 1.1747 


[1/119]   1%|           [00:00<?]

2025-08-11 17:57:00,234 - INFO - Epoch: 328/500, Iter: 2/119 -- train_loss: 1.1746 
2025-08-11 17:57:00,795 - INFO - Epoch: 328/500, Iter: 3/119 -- train_loss: 1.1748 
2025-08-11 17:57:10,375 - INFO - Epoch: 328/500, Iter: 4/119 -- train_loss: 1.1746 
2025-08-11 17:57:10,888 - INFO - Epoch: 328/500, Iter: 5/119 -- train_loss: 1.1479 
2025-08-11 17:57:11,473 - INFO - Epoch: 328/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 17:57:11,981 - INFO - Epoch: 328/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 17:57:12,675 - INFO - Epoch: 328/500, Iter: 8/119 -- train_loss: 1.0932 
2025-08-11 17:57:14,690 - INFO - Epoch: 328/500, Iter: 9/119 -- train_loss: 1.1350 
2025-08-11 17:57:15,250 - INFO - Epoch: 328/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 17:57:27,394 - INFO - Epoch: 328/500, Iter: 11/119 -- train_loss: 1.0104 
2025-08-11 17:57:27,938 - INFO - Epoch: 328/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 17:57:28,506 - INFO - Epoch: 328/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 18:01:51,647 - INFO - Epoch: 329/500, Iter: 1/119 -- train_loss: 1.1458 


[1/119]   1%|           [00:00<?]

2025-08-11 18:01:57,394 - INFO - Epoch: 329/500, Iter: 2/119 -- train_loss: 1.1644 
2025-08-11 18:01:57,718 - INFO - Epoch: 329/500, Iter: 3/119 -- train_loss: 1.1610 
2025-08-11 18:01:58,232 - INFO - Epoch: 329/500, Iter: 4/119 -- train_loss: 1.1743 
2025-08-11 18:01:58,587 - INFO - Epoch: 329/500, Iter: 5/119 -- train_loss: 1.1625 
2025-08-11 18:02:01,348 - INFO - Epoch: 329/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 18:02:01,703 - INFO - Epoch: 329/500, Iter: 7/119 -- train_loss: 1.1745 
2025-08-11 18:02:02,047 - INFO - Epoch: 329/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 18:02:05,473 - INFO - Epoch: 329/500, Iter: 9/119 -- train_loss: 1.0030 
2025-08-11 18:02:05,843 - INFO - Epoch: 329/500, Iter: 10/119 -- train_loss: 1.1711 
2025-08-11 18:02:06,184 - INFO - Epoch: 329/500, Iter: 11/119 -- train_loss: 1.1703 
2025-08-11 18:02:06,517 - INFO - Epoch: 329/500, Iter: 12/119 -- train_loss: 1.1742 
2025-08-11 18:02:06,884 - INFO - Epoch: 329/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 18:06:31,358 - INFO - Epoch: 330/500, Iter: 1/119 -- train_loss: 1.1683 


[1/119]   1%|           [00:00<?]

2025-08-11 18:06:31,671 - INFO - Epoch: 330/500, Iter: 2/119 -- train_loss: 1.0071 
2025-08-11 18:06:31,970 - INFO - Epoch: 330/500, Iter: 3/119 -- train_loss: 1.1603 
2025-08-11 18:06:33,682 - INFO - Epoch: 330/500, Iter: 4/119 -- train_loss: 1.1741 
2025-08-11 18:06:33,982 - INFO - Epoch: 330/500, Iter: 5/119 -- train_loss: 1.1688 
2025-08-11 18:06:34,270 - INFO - Epoch: 330/500, Iter: 6/119 -- train_loss: 1.1741 
2025-08-11 18:06:34,597 - INFO - Epoch: 330/500, Iter: 7/119 -- train_loss: 1.1741 
2025-08-11 18:06:34,889 - INFO - Epoch: 330/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 18:06:38,230 - INFO - Epoch: 330/500, Iter: 9/119 -- train_loss: 1.1741 
2025-08-11 18:06:40,820 - INFO - Epoch: 330/500, Iter: 10/119 -- train_loss: 1.1741 
2025-08-11 18:06:41,165 - INFO - Epoch: 330/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 18:06:41,487 - INFO - Epoch: 330/500, Iter: 12/119 -- train_loss: 1.1590 
2025-08-11 18:06:41,837 - INFO - Epoch: 330/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 18:10:38,702 - INFO - Epoch: 331/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 18:10:41,216 - INFO - Epoch: 331/500, Iter: 2/119 -- train_loss: 1.1742 
2025-08-11 18:10:42,262 - INFO - Epoch: 331/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 18:10:42,892 - INFO - Epoch: 331/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 18:10:43,382 - INFO - Epoch: 331/500, Iter: 5/119 -- train_loss: 1.1740 
2025-08-11 18:10:43,873 - INFO - Epoch: 331/500, Iter: 6/119 -- train_loss: 1.1742 
2025-08-11 18:10:44,307 - INFO - Epoch: 331/500, Iter: 7/119 -- train_loss: 1.1588 
2025-08-11 18:10:44,860 - INFO - Epoch: 331/500, Iter: 8/119 -- train_loss: 1.1437 
2025-08-11 18:10:52,045 - INFO - Epoch: 331/500, Iter: 9/119 -- train_loss: 1.1705 
2025-08-11 18:10:52,545 - INFO - Epoch: 331/500, Iter: 10/119 -- train_loss: 1.1380 
2025-08-11 18:10:53,070 - INFO - Epoch: 331/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 18:10:53,587 - INFO - Epoch: 331/500, Iter: 12/119 -- train_loss: 1.1665 
2025-08-11 18:10:54,051 - INFO - Epoch: 331/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 18:17:01,991 - INFO - Epoch: 332/500, Iter: 1/119 -- train_loss: 1.1742 


[1/119]   1%|           [00:00<?]

2025-08-11 18:17:02,577 - INFO - Epoch: 332/500, Iter: 2/119 -- train_loss: 1.1620 
2025-08-11 18:17:03,421 - INFO - Epoch: 332/500, Iter: 3/119 -- train_loss: 1.1742 
2025-08-11 18:17:03,910 - INFO - Epoch: 332/500, Iter: 4/119 -- train_loss: 1.1742 
2025-08-11 18:17:04,463 - INFO - Epoch: 332/500, Iter: 5/119 -- train_loss: 1.1484 
2025-08-11 18:17:04,986 - INFO - Epoch: 332/500, Iter: 6/119 -- train_loss: 1.1207 
2025-08-11 18:17:05,501 - INFO - Epoch: 332/500, Iter: 7/119 -- train_loss: 1.1742 
2025-08-11 18:17:06,106 - INFO - Epoch: 332/500, Iter: 8/119 -- train_loss: 1.1742 
2025-08-11 18:17:14,143 - INFO - Epoch: 332/500, Iter: 9/119 -- train_loss: 1.1449 
2025-08-11 18:17:14,497 - INFO - Epoch: 332/500, Iter: 10/119 -- train_loss: 1.1594 
2025-08-11 18:17:17,490 - INFO - Epoch: 332/500, Iter: 11/119 -- train_loss: 1.1742 
2025-08-11 18:17:17,828 - INFO - Epoch: 332/500, Iter: 12/119 -- train_loss: 1.1441 
2025-08-11 18:17:18,174 - INFO - Epoch: 332/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 18:21:33,424 - INFO - Epoch: 333/500, Iter: 1/119 -- train_loss: 1.1278 


[1/119]   1%|           [00:00<?]

2025-08-11 18:21:33,746 - INFO - Epoch: 333/500, Iter: 2/119 -- train_loss: 1.1678 
2025-08-11 18:21:34,024 - INFO - Epoch: 333/500, Iter: 3/119 -- train_loss: 1.1468 
2025-08-11 18:21:34,308 - INFO - Epoch: 333/500, Iter: 4/119 -- train_loss: 1.1549 
2025-08-11 18:21:34,604 - INFO - Epoch: 333/500, Iter: 5/119 -- train_loss: 1.1629 
2025-08-11 18:21:34,905 - INFO - Epoch: 333/500, Iter: 6/119 -- train_loss: 1.1743 
2025-08-11 18:21:37,749 - INFO - Epoch: 333/500, Iter: 7/119 -- train_loss: 1.1536 
2025-08-11 18:21:38,020 - INFO - Epoch: 333/500, Iter: 8/119 -- train_loss: 1.1743 
2025-08-11 18:21:48,182 - INFO - Epoch: 333/500, Iter: 9/119 -- train_loss: 0.9432 
2025-08-11 18:21:48,438 - INFO - Epoch: 333/500, Iter: 10/119 -- train_loss: 1.1751 
2025-08-11 18:21:48,692 - INFO - Epoch: 333/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 18:21:48,965 - INFO - Epoch: 333/500, Iter: 12/119 -- train_loss: 1.1615 
2025-08-11 18:21:49,239 - INFO - Epoch: 333/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

2025-08-11 18:25:19,368 - INFO - Epoch: 334/500, Iter: 1/119 -- train_loss: 1.1741 


[1/119]   1%|           [00:00<?]

2025-08-11 18:25:19,652 - INFO - Epoch: 334/500, Iter: 2/119 -- train_loss: 1.1741 
2025-08-11 18:25:21,437 - INFO - Epoch: 334/500, Iter: 3/119 -- train_loss: 1.1658 
2025-08-11 18:25:25,929 - INFO - Epoch: 334/500, Iter: 4/119 -- train_loss: 1.1761 
2025-08-11 18:25:26,226 - INFO - Epoch: 334/500, Iter: 5/119 -- train_loss: 1.1378 
2025-08-11 18:25:26,509 - INFO - Epoch: 334/500, Iter: 6/119 -- train_loss: 1.1709 
2025-08-11 18:25:26,768 - INFO - Epoch: 334/500, Iter: 7/119 -- train_loss: 1.1701 
2025-08-11 18:25:27,043 - INFO - Epoch: 334/500, Iter: 8/119 -- train_loss: 1.1741 
2025-08-11 18:25:27,356 - INFO - Epoch: 334/500, Iter: 9/119 -- train_loss: 1.1707 
2025-08-11 18:25:27,734 - INFO - Epoch: 334/500, Iter: 10/119 -- train_loss: 1.1637 
2025-08-11 18:25:28,046 - INFO - Epoch: 334/500, Iter: 11/119 -- train_loss: 1.1741 
2025-08-11 18:25:32,327 - INFO - Epoch: 334/500, Iter: 12/119 -- train_loss: 1.1741 
2025-08-11 18:25:32,641 - INFO - Epoch: 334/500, Iter: 13/119 -- train_lo

[1/20]   5%|5          [00:00<?]

Engine run is terminating due to exception: 


2025-08-11 18:28:18,962 - ERROR - Exception: 
Traceback (most recent call last):
  File "c:\Users\anson\OneDrive\Documents\p158-ap\p158-env\Lib\site-packages\ignite\engine\engine.py", line 1032, in _run_once_on_dataset_as_gen
    self.state.batch = next(self._dataloader_iter)
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\anson\OneDrive\Documents\p158-ap\p158-env\Lib\site-packages\torch\utils\data\dataloader.py", line 733, in __next__
    data = self._next_data()
           ^^^^^^^^^^^^^^^^^
  File "c:\Users\anson\OneDrive\Documents\p158-ap\p158-env\Lib\site-packages\torch\utils\data\dataloader.py", line 1480, in _next_data
    raise StopIteration
StopIteration

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\anson\OneDrive\Documents\p158-ap\p158-env\Lib\site-packages\ignite\engine\engine.py", line 959, in _internal_run_as_gen
    epoch_time_taken += yield from self._run_once_on_dataset_as_gen

KeyboardInterrupt: 

In [None]:
import os
from prostate158.inference3 import inference_pipeline

# Set paths for case ID 20
data_dir = "prostate158"  # Base directory from config
case_dir = os.path.join(data_dir, "train", "051")

# Input paths
t2_path = os.path.join(case_dir, "t2.nii.gz")
adc_path = os.path.join(case_dir, "adc.nii.gz")
dwi_path = os.path.join(case_dir, "dwi.nii.gz")

# Output path
os.makedirs("predictions", exist_ok=True)
output_path = os.path.join("predictions", "case_051_tumor_pred.nii.gz")

# Run inference
inference_pipeline(
    t2_path=t2_path,
    adc_path=adc_path,
    dwi_path=dwi_path,
    output_path=output_path,
    config_path="tumor.yaml",
    checkpoint_path="models/tumor.pt",
)

Finish the training with final evaluation of the best model. To allow visualization of all outputs, add OutputStore handler first. Otherwise only output form the last epoch will be accessible. 

In [None]:
eos_handler = ignite.handlers.EpochOutputStore()
eos_handler.attach(trainer.evaluator, "output")

In [None]:
trainer.evaluate(checkpoint="models/<model-name>.pt")

Generate a markdown document with segmentation results

In [None]:
report_generator = ReportGenerator(config.run_id, config.out_dir, config.log_dir)
report_generator.generate_report()

Have a look at some outputs

In [None]:
output = trainer.evaluator.state.output
keys = ["image", "label", "pred"]
outputs = {k: [o[0][k].detach().cpu().squeeze() for o in output] for k in keys}

In [None]:
ListViewer(
    [o.transpose(0, 2).flip(-2) for o in outputs["image"][0:3]]
    + [o.argmax(0).transpose(0, 2).flip(-2).float() for o in outputs["label"][0:3]]
    + [o.argmax(0).transpose(0, 2).flip(-2).float() for o in outputs["pred"][0:3]]
).show()