In [2]:
### package
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from datetime import datetime, timedelta

In [3]:
### count params
import operator
from functools import reduce

# print the number of parameters
def count_params(model):
    c = 0
    for p in list(model.parameters()):
        c += reduce(operator.mul, list(p.size()))
    return c

In [4]:
### read data function
def read_one_year(year, c_idx_list, timestep, datatype, 
                  height=157, width=103, keep_missing_hour=True):  
    start_time = datetime(year, 1, 1, 0)
    end_time = datetime(year + 1, 1, 1, 0)
    delta = timedelta(hours=timestep)

    data_tensor = []  
    mask_tensor = []
    current_time = start_time
    while current_time < end_time:
        timestamp = current_time.strftime('%Y%m%d%H')
        path = construct_file_path(year, timestamp, datatype)   # 建立檔案完整路徑
        grid_data = read_data_from_file(path, c_idx_list)   # 讀取檔案

        if grid_data is not None:
            valid_values = grid_data[torch.isfinite(grid_data)]    # 回傳既不是 NaN 或 ±Inf 的值
            if valid_values.numel() > 0 and torch.all(valid_values == valid_values[0]):   # 如果tensor的數值都一樣，視為異常數據
                data_tensor.append(torch.full((len(c_idx_list), height, width), float('nan')))
                mask_tensor.append(0.0)
                print(f"All valid values are the same in file: {path}. Replacing with NaN tensor.")
            else:
                data_tensor.append(grid_data)
                mask_tensor.append(1.0)
        else:
            if keep_missing_hour:
                data_tensor.append(torch.full((len(c_idx_list), height, width), float('nan')))
                mask_tensor.append(0.0)

        current_time += delta

    return torch.stack(data_tensor), torch.tensor(mask_tensor)    # [total hours, C, H, W]


def construct_file_path(year, timestamp, mode):
    if mode == "surfgrid": 
        return f"C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_{year}(hour)/surfgrid_RCEC_{timestamp}.pt"
    elif mode == "obs":
        return f"C:/Users/kevin/Documents/新 科技部計畫/PT_observation_{year}(hour)/observation_{timestamp}.pt"
    else:
        raise ValueError("Invalid mode! Must be 'surfgrid' or 'obs'.")


def read_data_from_file(path, c_idx):
    if os.path.exists(path):
        return torch.load(path, weights_only=True)[c_idx, ...]  # [C_selected, H, W]
    else:
        print(f"⚠️ Missing file: {path}")
        return None


def read_data(year_list: list,c_idx_list: list, timestep: int, 
              datatype: str, keep_missing_hour=True):
    """
    載入多個年份的資料，將其串接成一個大tensor，支援多通道輸入。
    return：data_tensor: [8760, C, H, W], mask_tensor: [8760]
    """
    all_data_tensors = []
    all_mask_tensors = []   # 遺失或是資料不正確為0

    for year in year_list:
        data_tensor, mask_tensor = read_one_year(
            int(year), c_idx_list, int(timestep), datatype, keep_missing_hour=keep_missing_hour
        )
        all_data_tensors.append(data_tensor)
        all_mask_tensors.append(mask_tensor)

    return torch.cat(all_data_tensors), torch.cat(all_mask_tensors)

In [5]:
### loading data tensor
train_data_tensor, train_mask_tensor = read_data(
    year_list=[2020, 2021, 2022],
    c_idx_list=[3, 5, 6, 7, 8], # (PM25, windspeed, winddir, K, humidity%)
    timestep=1,
    datatype="surfgrid",
)  # data_tensor: [T_total, C, H, W], mask_tensor: [T_total]

val_data_tensor, val_mask_tensor = read_data(
    year_list=[2023],
    c_idx_list=[3, 5, 6, 7, 8],
    timestep=1,
    datatype="surfgrid",
)  # data_tensor: [T_total, C, H, W], mask_tensor: [T_total]

print(train_data_tensor.shape)
print(val_data_tensor.shape)

⚠️ Missing file: C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_2020(hour)/surfgrid_RCEC_2020010100.pt
⚠️ Missing file: C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_2020(hour)/surfgrid_RCEC_2020010101.pt
⚠️ Missing file: C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_2020(hour)/surfgrid_RCEC_2020010102.pt
⚠️ Missing file: C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_2020(hour)/surfgrid_RCEC_2020010103.pt
⚠️ Missing file: C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_2020(hour)/surfgrid_RCEC_2020010104.pt
⚠️ Missing file: C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_2020(hour)/surfgrid_RCEC_2020010105.pt
⚠️ Missing file: C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_2020(hour)/surfgrid_RCEC_2020010106.pt
⚠️ Missing file: C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_2020(hour)/surfgrid_RCEC_2020010107.pt
⚠️ Missing file: C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_2020(hour)/surfgrid_RCEC_2020010108.pt
⚠️ Missing file: C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_2020(hour)/surfgrid

In [6]:
### convert wind to u,v 
def convert_wind_to_uv(data_tensor: torch.Tensor, speed_idx: int, dir_idx: int) -> torch.Tensor:
    speed = data_tensor[:, speed_idx]
    direction = data_tensor[:, dir_idx]

    if torch.any(direction < 0):
        print("Skip: already converted to u/v.")
        return data_tensor

    theta_rad = direction * torch.pi / 180.0
    u = -speed * torch.sin(theta_rad)
    v = -speed * torch.cos(theta_rad)

    data_tensor[:, speed_idx] = u
    data_tensor[:, dir_idx] = v

    print("Wind converted to u/v.")
    return data_tensor

train_data_tensor = convert_wind_to_uv(train_data_tensor, speed_idx=1, dir_idx=2)
val_data_tensor = convert_wind_to_uv(val_data_tensor, speed_idx=1, dir_idx=2)

Wind converted to u/v.
Wind converted to u/v.


In [7]:
class TimeSeriesWindowDataset(Dataset):
    def __init__(self, data_tensor: torch.Tensor, 
                 mask_tensor: torch.Tensor, 
                 T_in: int, T: int, 
                 stride: int = 1,
                 add_lonlat: bool = False):
        """
        建立基於 sliding window 的時間序列 Dataset，並選擇性合併經緯度資訊。
        - stride: 時間步長 (每幾個時間點取一筆樣本)
        """
        self.data = data_tensor
        self.mask = mask_tensor
        self.T_in = T_in
        self.T = T
        self.stride = stride
        self.window_size = T_in + T
        self.add_lonlat = add_lonlat

        if add_lonlat:
            fixed_path = "C:/Users/kevin/Documents/新 科技部計畫/PT_grid_data_2023(hour)/surfgrid_RCEC_2023010100.pt"
            self.grid_lonlat = self._load_and_process_lonlat(fixed_path)
        else:
            self.grid_lonlat = None

        self.valid_indices = self._compute_valid_indices()

    def _load_and_process_lonlat(self, path: str) -> torch.Tensor:
        latlon_tensor = torch.load(path, weights_only=True)  # [C, H, W]
        lat = latlon_tensor[0]
        lon = latlon_tensor[1]

        def normalize(tensor):
            min_val = tensor.min()
            max_val = tensor.max()
            return 2 * (tensor - min_val) / (max_val - min_val) - 1

        lat = normalize(lat)
        lon = normalize(lon)
        return torch.stack([lon, lat], dim=0)  # [2, H, W]

    def _compute_valid_indices(self):
        valid_indices = []
        total_steps = self.data.shape[0]
        for i in range(0, total_steps - self.window_size + 1, self.stride):  
            window_mask = self.mask[i: i + self.window_size]
            if window_mask.all():
                valid_indices.append(i)
        return valid_indices

    def __len__(self):
        return len(self.valid_indices)

    def __getitem__(self, idx):
        start_idx = self.valid_indices[idx]
        end_idx = start_idx + self.window_size
        window = self.data[start_idx:end_idx]

        X_pm25 = window[:self.T_in, :1]     # PM25
        condition = window[self.T_in:, 1:]
        X = torch.cat([X_pm25, condition], dim=1)
        y = window[self.T_in:, :1]     # PM25

        T_in, C_in = X.shape[0], X.shape[1]
        X = X.reshape(T_in * C_in, *X.shape[2:])

        T_out, C_out = y.shape[0], y.shape[1]
        y = y.reshape(T_out * C_out, *y.shape[2:])

        if self.add_lonlat and self.grid_lonlat is not None:
            X = torch.cat([X, self.grid_lonlat], dim=0)

        return X, y

In [8]:
### build dataset/dataloader
T_in = 1
T = 1
stride = 2

train_dataset = TimeSeriesWindowDataset(train_data_tensor, train_mask_tensor, T_in, T, stride, add_lonlat=True)
val_dataset = TimeSeriesWindowDataset(val_data_tensor, val_mask_tensor, T_in, T, stride, add_lonlat=True)

n_train = len(train_dataset)
n_val = len(val_dataset)
print("訓練資料數量: ", n_train)
print("測試資料數量: ", n_val)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

訓練資料數量:  12771
測試資料數量:  4340


In [9]:
### check dataloader
def check_dataloader_nan_and_range(dataloader, name=""):
    print(f"\n📦 檢查 DataLoader：{name}")
    for i, (batch_X, batch_y) in enumerate(dataloader):
        nan_X = torch.isnan(batch_X).any().item()
        nan_y = torch.isnan(batch_y).any().item()

        print(f"🔁 Batch {i+1}:")
        
        print(f" - X shape: {batch_X.shape}, 含 NaN: {nan_X}")
        if not nan_X:
            #  batch_X 形狀為 (B, T*C, H, W)
            C = batch_X.shape[1]
            for c in range(C):
                x_c = batch_X[:, c, :, :].flatten()
                min_val = x_c.min().item()
                max_val = x_c.max().item()
                median_val = x_c.median().item()
                print(f"   - 通道 {c}: min={min_val:.2f}, median={median_val:.2f}, max={max_val:.2f}")
        else:
            print(" - X 數值範圍: (含 NaN，略過 per-channel 統計)")

        print(f" - y shape: {batch_y.shape}, 含 NaN: {nan_y}")
        if not nan_y:
            #  batch_y 形狀為 (B, T*C, H, W)
            C = batch_y.shape[1]
            for c in range(C):
                y_c = batch_y[:, c, :, :].flatten()
                min_val = y_c.min().item()
                max_val = y_c.max().item()
                median_val = y_c.median().item()
                print(f"   - 通道 {c}: min={min_val:.2f}, median={median_val:.2f}, max={max_val:.2f}")
        else:
            print(" - y 數值範圍: (含 NaN，略過 per-channel 統計)")

# ✅ 執行檢查
check_dataloader_nan_and_range(train_loader, "Train")
check_dataloader_nan_and_range(val_loader, "Validation")


📦 檢查 DataLoader：Train
🔁 Batch 1:
 - X shape: torch.Size([32, 7, 157, 103]), 含 NaN: False
   - 通道 0: min=0.00, median=2.01, max=116.19
   - 通道 1: min=-14.52, median=-0.45, max=14.92
   - 通道 2: min=-20.59, median=-0.30, max=18.13
   - 通道 3: min=268.59, median=298.03, max=308.81
   - 通道 4: min=26.38, median=85.68, max=100.00
   - 通道 5: min=-1.00, median=-0.03, max=1.00
   - 通道 6: min=-1.00, median=-0.00, max=1.00
 - y shape: torch.Size([32, 1, 157, 103]), 含 NaN: False
   - 通道 0: min=0.00, median=1.97, max=117.84
🔁 Batch 2:
 - X shape: torch.Size([32, 7, 157, 103]), 含 NaN: False
   - 通道 0: min=0.00, median=2.36, max=139.83
   - 通道 1: min=-17.84, median=-1.94, max=15.80
   - 通道 2: min=-20.53, median=-2.45, max=12.41
   - 通道 3: min=269.36, median=295.86, max=309.01
   - 通道 4: min=12.70, median=83.95, max=100.00
   - 通道 5: min=-1.00, median=-0.03, max=1.00
   - 通道 6: min=-1.00, median=-0.00, max=1.00
 - y shape: torch.Size([32, 1, 157, 103]), 含 NaN: False
   - 通道 0: min=0.01, median=2.36, ma

In [22]:
### build model
from afnonet import AFNONet
from argparse import Namespace

params = Namespace(
    patch_size=4,
    N_in_channels=7,
    N_out_channels=1,
    num_blocks=4
)

model = AFNONet(img_size=(160, 128), params=params, embed_dim=112, depth=3)
model.cuda()

AFNONet(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(7, 112, kernel_size=(4, 4), stride=(4, 4))
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): ModuleList(
    (0-2): 3 x Block(
      (norm1): LayerNorm((112,), eps=1e-06, elementwise_affine=True)
      (filter): AFNO2D()
      (drop_path): Identity()
      (norm2): LayerNorm((112,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=112, out_features=448, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=448, out_features=112, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
  )
  (norm): LayerNorm((112,), eps=1e-06, elementwise_affine=True)
  (head): Linear(in_features=112, out_features=16, bias=False)
)

In [23]:
### model summary
from torchinfo import summary

print(summary(model, input_size=(2, 7, 157, 103)))

Layer (type:depth-idx)                   Output Shape              Param #
AFNONet                                  [2, 1, 157, 103]          143,584
├─PatchEmbed: 1-1                        [2, 1280, 112]            --
│    └─Conv2d: 2-1                       [2, 112, 40, 32]          12,656
├─Dropout: 1-2                           [2, 1280, 112]            --
├─ModuleList: 1-3                        --                        --
│    └─Block: 2-2                        [2, 40, 32, 112]          --
│    │    └─LayerNorm: 3-1               [2, 40, 32, 112]          224
│    │    └─AFNO2D: 3-2                  [2, 40, 32, 112]          12,992
│    │    └─LayerNorm: 3-3               [2, 40, 32, 112]          224
│    │    └─Mlp: 3-4                     [2, 40, 32, 112]          100,912
│    │    └─Identity: 3-5                [2, 40, 32, 112]          --
│    └─Block: 2-3                        [2, 40, 32, 112]          --
│    │    └─LayerNorm: 3-6               [2, 40, 32, 112]        

In [24]:
### build optimizer
learning_rate = 1e-3
epochs = 100
iterations = epochs*(n_train//batch_size)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=iterations, eta_min=1e-4)

In [25]:
### record loss / best val loss
best_val_loss = float('inf')
train_loss_history = []
val_loss_history = []

In [26]:
### Training
from timeit import default_timer as timer
from datetime import timedelta
from neuralop.losses import LpLoss

model_save_path = 'best_AFNO_1to1_cond.pth'
loss_csv_path = 'loss_history_AFNO_1to1_cond.csv'
device = "cuda"
myloss = nn.MSELoss()

# 計時開始
start_time = timer()

# Early stopping 參數
patience = 20
early_stop_counter = 0
best_val_loss = float('inf')

for ep in range(epochs):
    # ========== 訓練階段 ==========
    model.train()
    train_loss_accum = 0.0
    train_samples = 0
    
    for x, y in tqdm(train_loader, desc=f"Training Epoch {ep+1}/{epochs}"):
        x = x.to(device)
        y = y.to(device)
        optimizer.zero_grad()

        pred = model(x)
        T0_pm25 = x[:, :1]
        loss = myloss(pred+T0_pm25, y)
        
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        train_loss_accum += loss.item() * x.size(0)
        train_samples += x.size(0)
    
    train_loss = train_loss_accum / train_samples
    train_loss_history.append(train_loss)
    print(f"Train Loss: {train_loss:.6f}")
    
    # ========== 驗證階段 ==========
    model.eval()
    val_loss_accum = 0.0
    val_samples = 0
    
    with torch.no_grad():
        for x, y in tqdm(val_loader, desc=f"Validation Epoch {ep+1}/{epochs}"):
            x = x.to(device)
            y = y.to(device)

            pred = model(x)
            T0_pm25 = x[:, :1]
            loss = myloss(pred+T0_pm25, y)
            
            val_loss_accum += loss.item() * x.size(0)
            val_samples += x.size(0)
    
    val_loss = val_loss_accum / val_samples
    val_loss_history.append(val_loss)
    print(f"Valid Loss: {val_loss:.6f}")
    
    # 檢查是否為最佳模型
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stop_counter = 0  # 重置 counter
        torch.save(model.state_dict(), model_save_path)
        print(f"💾 Best model saved (val_loss={val_loss:.6f})")
    else:
        early_stop_counter += 1
        print(f"⚠️ No improvement. Early stop counter: {early_stop_counter}/{patience}")
    
    # 每10個epoch儲存loss到CSV檔案
    if (ep + 1) % 10 == 0:
        # 創建DataFrame
        loss_df = pd.DataFrame({
            'Epoch': range(1, len(train_loss_history) + 1),
            'Train_Loss': train_loss_history,
            'Val_Loss': val_loss_history
        })
        
        # 儲存到CSV
        loss_df.to_csv(loss_csv_path, index=False)
        print(f"📊 Save loss history (up to epoch {ep+1})")
    
    # 若超過耐心次數則提前停止訓練
    if early_stop_counter >= patience:
        print(f"🛑 Early stopping triggered at epoch {ep+1}.")
        break
    
    # 結束一個 epoch
    print('-'*50)

# 計時結束
end_time = timer()
elapsed_time = end_time - start_time
formatted_time = str(timedelta(seconds=int(elapsed_time)))
print(f"⏱️ Total training time: {formatted_time}")

# 顯示最終結果
print(f"🎯 Best validation loss: {best_val_loss:.6f}")
print(f"📈 Total epochs completed: {len(train_loss_history)}")
print(f"💾 Model saved to: {model_save_path}")
print(f"📊 Loss history saved to: {loss_csv_path}")

Training Epoch 1/100: 100%|██████████| 399/399 [00:29<00:00, 13.62it/s]


Train Loss: 5.484836


Validation Epoch 1/100: 100%|██████████| 135/135 [00:02<00:00, 62.73it/s]


Valid Loss: 6.263143
💾 Best model saved (val_loss=6.263143)
--------------------------------------------------


Training Epoch 2/100: 100%|██████████| 399/399 [00:29<00:00, 13.51it/s]


Train Loss: 4.121596


Validation Epoch 2/100: 100%|██████████| 135/135 [00:02<00:00, 62.93it/s]


Valid Loss: 6.038059
💾 Best model saved (val_loss=6.038059)
--------------------------------------------------


Training Epoch 3/100: 100%|██████████| 399/399 [00:28<00:00, 13.99it/s]


Train Loss: 4.005520


Validation Epoch 3/100: 100%|██████████| 135/135 [00:02<00:00, 66.00it/s]


Valid Loss: 6.011973
💾 Best model saved (val_loss=6.011973)
--------------------------------------------------


Training Epoch 4/100: 100%|██████████| 399/399 [00:28<00:00, 13.78it/s]


Train Loss: 3.946885


Validation Epoch 4/100: 100%|██████████| 135/135 [00:02<00:00, 64.67it/s]


Valid Loss: 5.838681
💾 Best model saved (val_loss=5.838681)
--------------------------------------------------


Training Epoch 5/100: 100%|██████████| 399/399 [00:28<00:00, 13.98it/s]


Train Loss: 3.897162


Validation Epoch 5/100: 100%|██████████| 135/135 [00:02<00:00, 63.82it/s]


Valid Loss: 5.812569
💾 Best model saved (val_loss=5.812569)
--------------------------------------------------


Training Epoch 6/100: 100%|██████████| 399/399 [00:28<00:00, 13.87it/s]


Train Loss: 3.814538


Validation Epoch 6/100: 100%|██████████| 135/135 [00:02<00:00, 64.88it/s]


Valid Loss: 5.728414
💾 Best model saved (val_loss=5.728414)
--------------------------------------------------


Training Epoch 7/100: 100%|██████████| 399/399 [00:28<00:00, 13.89it/s]


Train Loss: 3.619106


Validation Epoch 7/100: 100%|██████████| 135/135 [00:02<00:00, 65.78it/s]


Valid Loss: 5.090331
💾 Best model saved (val_loss=5.090331)
--------------------------------------------------


Training Epoch 8/100: 100%|██████████| 399/399 [00:28<00:00, 14.11it/s]


Train Loss: 3.094079


Validation Epoch 8/100: 100%|██████████| 135/135 [00:02<00:00, 65.58it/s]


Valid Loss: 4.305337
💾 Best model saved (val_loss=4.305337)
--------------------------------------------------


Training Epoch 9/100: 100%|██████████| 399/399 [00:28<00:00, 13.77it/s]


Train Loss: 2.749212


Validation Epoch 9/100: 100%|██████████| 135/135 [00:01<00:00, 67.68it/s]


Valid Loss: 4.195003
💾 Best model saved (val_loss=4.195003)
--------------------------------------------------


Training Epoch 10/100: 100%|██████████| 399/399 [00:28<00:00, 13.83it/s]


Train Loss: 2.551905


Validation Epoch 10/100: 100%|██████████| 135/135 [00:02<00:00, 62.90it/s]


Valid Loss: 4.363834
⚠️ No improvement. Early stop counter: 1/20
📊 Save loss history (up to epoch 10)
--------------------------------------------------


Training Epoch 11/100: 100%|██████████| 399/399 [00:28<00:00, 13.89it/s]


Train Loss: 2.403708


Validation Epoch 11/100: 100%|██████████| 135/135 [00:02<00:00, 66.48it/s]


Valid Loss: 3.582531
💾 Best model saved (val_loss=3.582531)
--------------------------------------------------


Training Epoch 12/100: 100%|██████████| 399/399 [00:28<00:00, 14.15it/s]


Train Loss: 2.309468


Validation Epoch 12/100: 100%|██████████| 135/135 [00:02<00:00, 65.56it/s]


Valid Loss: 3.512011
💾 Best model saved (val_loss=3.512011)
--------------------------------------------------


Training Epoch 13/100: 100%|██████████| 399/399 [00:28<00:00, 14.09it/s]


Train Loss: 2.245223


Validation Epoch 13/100: 100%|██████████| 135/135 [00:02<00:00, 66.70it/s]


Valid Loss: 3.505425
💾 Best model saved (val_loss=3.505425)
--------------------------------------------------


Training Epoch 14/100: 100%|██████████| 399/399 [00:28<00:00, 13.85it/s]


Train Loss: 2.179045


Validation Epoch 14/100: 100%|██████████| 135/135 [00:02<00:00, 65.57it/s]


Valid Loss: 3.364529
💾 Best model saved (val_loss=3.364529)
--------------------------------------------------


Training Epoch 15/100: 100%|██████████| 399/399 [00:28<00:00, 13.81it/s]


Train Loss: 2.104426


Validation Epoch 15/100: 100%|██████████| 135/135 [00:02<00:00, 64.46it/s]


Valid Loss: 3.447050
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 16/100: 100%|██████████| 399/399 [00:28<00:00, 13.94it/s]


Train Loss: 2.076734


Validation Epoch 16/100: 100%|██████████| 135/135 [00:01<00:00, 68.59it/s]


Valid Loss: 3.159720
💾 Best model saved (val_loss=3.159720)
--------------------------------------------------


Training Epoch 17/100: 100%|██████████| 399/399 [00:28<00:00, 13.92it/s]


Train Loss: 2.028943


Validation Epoch 17/100: 100%|██████████| 135/135 [00:02<00:00, 64.19it/s]


Valid Loss: 3.142763
💾 Best model saved (val_loss=3.142763)
--------------------------------------------------


Training Epoch 18/100: 100%|██████████| 399/399 [00:28<00:00, 13.87it/s]


Train Loss: 1.997617


Validation Epoch 18/100: 100%|██████████| 135/135 [00:02<00:00, 65.66it/s]


Valid Loss: 3.100356
💾 Best model saved (val_loss=3.100356)
--------------------------------------------------


Training Epoch 19/100: 100%|██████████| 399/399 [00:28<00:00, 13.93it/s]


Train Loss: 1.968889


Validation Epoch 19/100: 100%|██████████| 135/135 [00:02<00:00, 65.72it/s]


Valid Loss: 3.243775
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 20/100: 100%|██████████| 399/399 [00:28<00:00, 13.99it/s]


Train Loss: 1.936530


Validation Epoch 20/100: 100%|██████████| 135/135 [00:02<00:00, 66.08it/s]


Valid Loss: 3.056731
💾 Best model saved (val_loss=3.056731)
📊 Save loss history (up to epoch 20)
--------------------------------------------------


Training Epoch 21/100: 100%|██████████| 399/399 [00:28<00:00, 13.91it/s]


Train Loss: 1.900400


Validation Epoch 21/100: 100%|██████████| 135/135 [00:02<00:00, 62.94it/s]


Valid Loss: 3.036365
💾 Best model saved (val_loss=3.036365)
--------------------------------------------------


Training Epoch 22/100: 100%|██████████| 399/399 [00:28<00:00, 14.07it/s]


Train Loss: 1.886281


Validation Epoch 22/100: 100%|██████████| 135/135 [00:02<00:00, 65.04it/s]


Valid Loss: 3.002008
💾 Best model saved (val_loss=3.002008)
--------------------------------------------------


Training Epoch 23/100: 100%|██████████| 399/399 [00:28<00:00, 14.08it/s]


Train Loss: 1.847006


Validation Epoch 23/100: 100%|██████████| 135/135 [00:02<00:00, 65.04it/s]


Valid Loss: 3.223589
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 24/100: 100%|██████████| 399/399 [00:28<00:00, 14.07it/s]


Train Loss: 1.843013


Validation Epoch 24/100: 100%|██████████| 135/135 [00:02<00:00, 65.82it/s]


Valid Loss: 2.852515
💾 Best model saved (val_loss=2.852515)
--------------------------------------------------


Training Epoch 25/100: 100%|██████████| 399/399 [00:28<00:00, 14.02it/s]


Train Loss: 1.818191


Validation Epoch 25/100: 100%|██████████| 135/135 [00:02<00:00, 65.87it/s]


Valid Loss: 3.090631
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 26/100: 100%|██████████| 399/399 [00:28<00:00, 14.07it/s]


Train Loss: 1.776673


Validation Epoch 26/100: 100%|██████████| 135/135 [00:02<00:00, 65.01it/s]


Valid Loss: 2.923478
⚠️ No improvement. Early stop counter: 2/20
--------------------------------------------------


Training Epoch 27/100: 100%|██████████| 399/399 [00:28<00:00, 14.08it/s]


Train Loss: 1.756316


Validation Epoch 27/100: 100%|██████████| 135/135 [00:01<00:00, 67.51it/s]


Valid Loss: 2.930599
⚠️ No improvement. Early stop counter: 3/20
--------------------------------------------------


Training Epoch 28/100: 100%|██████████| 399/399 [00:27<00:00, 14.28it/s]


Train Loss: 1.755636


Validation Epoch 28/100: 100%|██████████| 135/135 [00:02<00:00, 66.08it/s]


Valid Loss: 2.803841
💾 Best model saved (val_loss=2.803841)
--------------------------------------------------


Training Epoch 29/100: 100%|██████████| 399/399 [00:28<00:00, 14.01it/s]


Train Loss: 1.723149


Validation Epoch 29/100: 100%|██████████| 135/135 [00:02<00:00, 66.75it/s]


Valid Loss: 2.843535
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 30/100: 100%|██████████| 399/399 [00:28<00:00, 14.08it/s]


Train Loss: 1.707572


Validation Epoch 30/100: 100%|██████████| 135/135 [00:02<00:00, 65.94it/s]


Valid Loss: 2.816390
⚠️ No improvement. Early stop counter: 2/20
📊 Save loss history (up to epoch 30)
--------------------------------------------------


Training Epoch 31/100: 100%|██████████| 399/399 [00:28<00:00, 14.11it/s]


Train Loss: 1.695500


Validation Epoch 31/100: 100%|██████████| 135/135 [00:02<00:00, 66.97it/s]


Valid Loss: 2.759763
💾 Best model saved (val_loss=2.759763)
--------------------------------------------------


Training Epoch 32/100: 100%|██████████| 399/399 [00:28<00:00, 14.11it/s]


Train Loss: 1.664419


Validation Epoch 32/100: 100%|██████████| 135/135 [00:02<00:00, 65.16it/s]


Valid Loss: 3.035726
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 33/100: 100%|██████████| 399/399 [00:28<00:00, 14.05it/s]


Train Loss: 1.690488


Validation Epoch 33/100: 100%|██████████| 135/135 [00:02<00:00, 65.02it/s]


Valid Loss: 2.704659
💾 Best model saved (val_loss=2.704659)
--------------------------------------------------


Training Epoch 34/100: 100%|██████████| 399/399 [00:28<00:00, 14.05it/s]


Train Loss: 1.704458


Validation Epoch 34/100: 100%|██████████| 135/135 [00:02<00:00, 65.22it/s]


Valid Loss: 2.735863
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 35/100: 100%|██████████| 399/399 [00:28<00:00, 14.10it/s]


Train Loss: 1.629453


Validation Epoch 35/100: 100%|██████████| 135/135 [00:02<00:00, 65.28it/s]


Valid Loss: 2.719004
⚠️ No improvement. Early stop counter: 2/20
--------------------------------------------------


Training Epoch 36/100: 100%|██████████| 399/399 [00:28<00:00, 14.03it/s]


Train Loss: 1.620269


Validation Epoch 36/100: 100%|██████████| 135/135 [00:02<00:00, 65.68it/s]


Valid Loss: 2.713394
⚠️ No improvement. Early stop counter: 3/20
--------------------------------------------------


Training Epoch 37/100: 100%|██████████| 399/399 [00:28<00:00, 13.83it/s]


Train Loss: 1.622786


Validation Epoch 37/100: 100%|██████████| 135/135 [00:02<00:00, 64.90it/s]


Valid Loss: 2.770966
⚠️ No improvement. Early stop counter: 4/20
--------------------------------------------------


Training Epoch 38/100: 100%|██████████| 399/399 [00:28<00:00, 13.94it/s]


Train Loss: 1.614187


Validation Epoch 38/100: 100%|██████████| 135/135 [00:02<00:00, 65.84it/s]


Valid Loss: 2.691629
💾 Best model saved (val_loss=2.691629)
--------------------------------------------------


Training Epoch 39/100: 100%|██████████| 399/399 [00:28<00:00, 13.95it/s]


Train Loss: 1.595040


Validation Epoch 39/100: 100%|██████████| 135/135 [00:02<00:00, 65.87it/s]


Valid Loss: 2.700418
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 40/100: 100%|██████████| 399/399 [00:28<00:00, 14.02it/s]


Train Loss: 1.552694


Validation Epoch 40/100: 100%|██████████| 135/135 [00:02<00:00, 65.09it/s]


Valid Loss: 2.720272
⚠️ No improvement. Early stop counter: 2/20
📊 Save loss history (up to epoch 40)
--------------------------------------------------


Training Epoch 41/100: 100%|██████████| 399/399 [00:28<00:00, 13.94it/s]


Train Loss: 1.555081


Validation Epoch 41/100: 100%|██████████| 135/135 [00:02<00:00, 65.59it/s]


Valid Loss: 2.655214
💾 Best model saved (val_loss=2.655214)
--------------------------------------------------


Training Epoch 42/100: 100%|██████████| 399/399 [00:28<00:00, 14.15it/s]


Train Loss: 1.541760


Validation Epoch 42/100: 100%|██████████| 135/135 [00:02<00:00, 66.84it/s]


Valid Loss: 2.688011
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 43/100: 100%|██████████| 399/399 [00:28<00:00, 14.12it/s]


Train Loss: 1.553930


Validation Epoch 43/100: 100%|██████████| 135/135 [00:02<00:00, 65.06it/s]


Valid Loss: 2.747834
⚠️ No improvement. Early stop counter: 2/20
--------------------------------------------------


Training Epoch 44/100: 100%|██████████| 399/399 [00:28<00:00, 14.13it/s]


Train Loss: 1.545346


Validation Epoch 44/100: 100%|██████████| 135/135 [00:02<00:00, 64.64it/s]


Valid Loss: 2.653466
💾 Best model saved (val_loss=2.653466)
--------------------------------------------------


Training Epoch 45/100: 100%|██████████| 399/399 [00:28<00:00, 13.97it/s]


Train Loss: 1.512719


Validation Epoch 45/100: 100%|██████████| 135/135 [00:02<00:00, 67.29it/s]


Valid Loss: 2.753678
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 46/100: 100%|██████████| 399/399 [00:28<00:00, 13.96it/s]


Train Loss: 1.503915


Validation Epoch 46/100: 100%|██████████| 135/135 [00:02<00:00, 66.99it/s]


Valid Loss: 2.785365
⚠️ No improvement. Early stop counter: 2/20
--------------------------------------------------


Training Epoch 47/100: 100%|██████████| 399/399 [00:28<00:00, 13.84it/s]


Train Loss: 1.489558


Validation Epoch 47/100: 100%|██████████| 135/135 [00:02<00:00, 66.54it/s]


Valid Loss: 2.582083
💾 Best model saved (val_loss=2.582083)
--------------------------------------------------


Training Epoch 48/100: 100%|██████████| 399/399 [00:28<00:00, 13.88it/s]


Train Loss: 1.477921


Validation Epoch 48/100: 100%|██████████| 135/135 [00:02<00:00, 66.76it/s]


Valid Loss: 2.595099
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 49/100: 100%|██████████| 399/399 [00:28<00:00, 14.04it/s]


Train Loss: 1.454786


Validation Epoch 49/100: 100%|██████████| 135/135 [00:02<00:00, 65.55it/s]


Valid Loss: 2.706171
⚠️ No improvement. Early stop counter: 2/20
--------------------------------------------------


Training Epoch 50/100: 100%|██████████| 399/399 [00:29<00:00, 13.75it/s]


Train Loss: 1.465216


Validation Epoch 50/100: 100%|██████████| 135/135 [00:02<00:00, 65.90it/s]


Valid Loss: 2.576586
💾 Best model saved (val_loss=2.576586)
📊 Save loss history (up to epoch 50)
--------------------------------------------------


Training Epoch 51/100: 100%|██████████| 399/399 [00:28<00:00, 13.90it/s]


Train Loss: 1.456713


Validation Epoch 51/100: 100%|██████████| 135/135 [00:02<00:00, 65.40it/s]


Valid Loss: 2.556043
💾 Best model saved (val_loss=2.556043)
--------------------------------------------------


Training Epoch 52/100: 100%|██████████| 399/399 [00:28<00:00, 14.11it/s]


Train Loss: 1.437321


Validation Epoch 52/100: 100%|██████████| 135/135 [00:02<00:00, 66.35it/s]


Valid Loss: 2.632052
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 53/100: 100%|██████████| 399/399 [00:28<00:00, 14.14it/s]


Train Loss: 1.470836


Validation Epoch 53/100: 100%|██████████| 135/135 [00:02<00:00, 67.14it/s]


Valid Loss: 2.573021
⚠️ No improvement. Early stop counter: 2/20
--------------------------------------------------


Training Epoch 54/100: 100%|██████████| 399/399 [00:28<00:00, 14.09it/s]


Train Loss: 1.425105


Validation Epoch 54/100: 100%|██████████| 135/135 [00:02<00:00, 66.57it/s]


Valid Loss: 2.570709
⚠️ No improvement. Early stop counter: 3/20
--------------------------------------------------


Training Epoch 55/100: 100%|██████████| 399/399 [00:28<00:00, 14.02it/s]


Train Loss: 1.422565


Validation Epoch 55/100: 100%|██████████| 135/135 [00:02<00:00, 65.91it/s]


Valid Loss: 2.595620
⚠️ No improvement. Early stop counter: 4/20
--------------------------------------------------


Training Epoch 56/100: 100%|██████████| 399/399 [00:28<00:00, 14.06it/s]


Train Loss: 1.410178


Validation Epoch 56/100: 100%|██████████| 135/135 [00:02<00:00, 66.76it/s]


Valid Loss: 2.547827
💾 Best model saved (val_loss=2.547827)
--------------------------------------------------


Training Epoch 57/100: 100%|██████████| 399/399 [00:28<00:00, 13.96it/s]


Train Loss: 1.391467


Validation Epoch 57/100: 100%|██████████| 135/135 [00:02<00:00, 65.11it/s]


Valid Loss: 2.558382
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 58/100: 100%|██████████| 399/399 [00:28<00:00, 14.02it/s]


Train Loss: 1.413986


Validation Epoch 58/100: 100%|██████████| 135/135 [00:02<00:00, 65.74it/s]


Valid Loss: 2.574637
⚠️ No improvement. Early stop counter: 2/20
--------------------------------------------------


Training Epoch 59/100: 100%|██████████| 399/399 [00:28<00:00, 14.06it/s]


Train Loss: 1.384959


Validation Epoch 59/100: 100%|██████████| 135/135 [00:02<00:00, 66.73it/s]


Valid Loss: 2.531780
💾 Best model saved (val_loss=2.531780)
--------------------------------------------------


Training Epoch 60/100: 100%|██████████| 399/399 [00:28<00:00, 14.06it/s]


Train Loss: 1.367250


Validation Epoch 60/100: 100%|██████████| 135/135 [00:02<00:00, 65.41it/s]


Valid Loss: 2.511982
💾 Best model saved (val_loss=2.511982)
📊 Save loss history (up to epoch 60)
--------------------------------------------------


Training Epoch 61/100: 100%|██████████| 399/399 [00:28<00:00, 13.87it/s]


Train Loss: 1.360053


Validation Epoch 61/100: 100%|██████████| 135/135 [00:02<00:00, 65.39it/s]


Valid Loss: 2.540287
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 62/100: 100%|██████████| 399/399 [00:28<00:00, 14.13it/s]


Train Loss: 1.347390


Validation Epoch 62/100: 100%|██████████| 135/135 [00:02<00:00, 66.07it/s]


Valid Loss: 2.510550
💾 Best model saved (val_loss=2.510550)
--------------------------------------------------


Training Epoch 63/100: 100%|██████████| 399/399 [00:28<00:00, 14.19it/s]


Train Loss: 1.342854


Validation Epoch 63/100: 100%|██████████| 135/135 [00:01<00:00, 67.69it/s]


Valid Loss: 2.489669
💾 Best model saved (val_loss=2.489669)
--------------------------------------------------


Training Epoch 64/100: 100%|██████████| 399/399 [00:28<00:00, 14.16it/s]


Train Loss: 1.378202


Validation Epoch 64/100: 100%|██████████| 135/135 [00:02<00:00, 63.91it/s]


Valid Loss: 2.477049
💾 Best model saved (val_loss=2.477049)
--------------------------------------------------


Training Epoch 65/100: 100%|██████████| 399/399 [00:28<00:00, 13.83it/s]


Train Loss: 1.345256


Validation Epoch 65/100: 100%|██████████| 135/135 [00:02<00:00, 66.38it/s]


Valid Loss: 2.528663
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 66/100: 100%|██████████| 399/399 [00:28<00:00, 13.94it/s]


Train Loss: 1.328009


Validation Epoch 66/100: 100%|██████████| 135/135 [00:02<00:00, 65.68it/s]


Valid Loss: 2.532711
⚠️ No improvement. Early stop counter: 2/20
--------------------------------------------------


Training Epoch 67/100: 100%|██████████| 399/399 [00:28<00:00, 14.19it/s]


Train Loss: 1.327549


Validation Epoch 67/100: 100%|██████████| 135/135 [00:02<00:00, 67.50it/s]


Valid Loss: 2.484711
⚠️ No improvement. Early stop counter: 3/20
--------------------------------------------------


Training Epoch 68/100: 100%|██████████| 399/399 [00:28<00:00, 14.15it/s]


Train Loss: 1.329483


Validation Epoch 68/100: 100%|██████████| 135/135 [00:02<00:00, 66.58it/s]


Valid Loss: 2.483559
⚠️ No improvement. Early stop counter: 4/20
--------------------------------------------------


Training Epoch 69/100: 100%|██████████| 399/399 [00:28<00:00, 14.12it/s]


Train Loss: 1.310824


Validation Epoch 69/100: 100%|██████████| 135/135 [00:02<00:00, 67.05it/s]


Valid Loss: 2.502655
⚠️ No improvement. Early stop counter: 5/20
--------------------------------------------------


Training Epoch 70/100: 100%|██████████| 399/399 [00:28<00:00, 13.95it/s]


Train Loss: 1.301381


Validation Epoch 70/100: 100%|██████████| 135/135 [00:02<00:00, 66.47it/s]


Valid Loss: 2.485158
⚠️ No improvement. Early stop counter: 6/20
📊 Save loss history (up to epoch 70)
--------------------------------------------------


Training Epoch 71/100: 100%|██████████| 399/399 [00:28<00:00, 13.98it/s]


Train Loss: 1.297857


Validation Epoch 71/100: 100%|██████████| 135/135 [00:02<00:00, 66.00it/s]


Valid Loss: 2.481657
⚠️ No improvement. Early stop counter: 7/20
--------------------------------------------------


Training Epoch 72/100: 100%|██████████| 399/399 [00:28<00:00, 14.14it/s]


Train Loss: 1.290220


Validation Epoch 72/100: 100%|██████████| 135/135 [00:02<00:00, 66.01it/s]


Valid Loss: 2.467009
💾 Best model saved (val_loss=2.467009)
--------------------------------------------------


Training Epoch 73/100: 100%|██████████| 399/399 [00:28<00:00, 13.96it/s]


Train Loss: 1.301865


Validation Epoch 73/100: 100%|██████████| 135/135 [00:02<00:00, 64.89it/s]


Valid Loss: 2.447452
💾 Best model saved (val_loss=2.447452)
--------------------------------------------------


Training Epoch 74/100: 100%|██████████| 399/399 [00:28<00:00, 13.86it/s]


Train Loss: 1.279417


Validation Epoch 74/100: 100%|██████████| 135/135 [00:02<00:00, 65.33it/s]


Valid Loss: 2.478705
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 75/100: 100%|██████████| 399/399 [00:28<00:00, 14.00it/s]


Train Loss: 1.274791


Validation Epoch 75/100: 100%|██████████| 135/135 [00:01<00:00, 67.69it/s]


Valid Loss: 2.461852
⚠️ No improvement. Early stop counter: 2/20
--------------------------------------------------


Training Epoch 76/100: 100%|██████████| 399/399 [00:28<00:00, 13.95it/s]


Train Loss: 1.269030


Validation Epoch 76/100: 100%|██████████| 135/135 [00:02<00:00, 63.77it/s]


Valid Loss: 2.458732
⚠️ No improvement. Early stop counter: 3/20
--------------------------------------------------


Training Epoch 77/100: 100%|██████████| 399/399 [00:28<00:00, 13.88it/s]


Train Loss: 1.266837


Validation Epoch 77/100: 100%|██████████| 135/135 [00:02<00:00, 66.07it/s]


Valid Loss: 2.441370
💾 Best model saved (val_loss=2.441370)
--------------------------------------------------


Training Epoch 78/100: 100%|██████████| 399/399 [00:28<00:00, 14.02it/s]


Train Loss: 1.261231


Validation Epoch 78/100: 100%|██████████| 135/135 [00:02<00:00, 65.89it/s]


Valid Loss: 2.431064
💾 Best model saved (val_loss=2.431064)
--------------------------------------------------


Training Epoch 79/100: 100%|██████████| 399/399 [00:28<00:00, 14.03it/s]


Train Loss: 1.259338


Validation Epoch 79/100: 100%|██████████| 135/135 [00:02<00:00, 63.66it/s]


Valid Loss: 2.447838
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 80/100: 100%|██████████| 399/399 [00:28<00:00, 14.04it/s]


Train Loss: 1.254154


Validation Epoch 80/100: 100%|██████████| 135/135 [00:02<00:00, 64.09it/s]


Valid Loss: 2.489204
⚠️ No improvement. Early stop counter: 2/20
📊 Save loss history (up to epoch 80)
--------------------------------------------------


Training Epoch 81/100: 100%|██████████| 399/399 [00:28<00:00, 13.85it/s]


Train Loss: 1.250103


Validation Epoch 81/100: 100%|██████████| 135/135 [00:01<00:00, 67.85it/s]


Valid Loss: 2.451049
⚠️ No improvement. Early stop counter: 3/20
--------------------------------------------------


Training Epoch 82/100: 100%|██████████| 399/399 [00:28<00:00, 14.10it/s]


Train Loss: 1.244852


Validation Epoch 82/100: 100%|██████████| 135/135 [00:02<00:00, 66.49it/s]


Valid Loss: 2.470059
⚠️ No improvement. Early stop counter: 4/20
--------------------------------------------------


Training Epoch 83/100: 100%|██████████| 399/399 [00:27<00:00, 14.28it/s]


Train Loss: 1.243410


Validation Epoch 83/100: 100%|██████████| 135/135 [00:02<00:00, 66.58it/s]


Valid Loss: 2.430929
💾 Best model saved (val_loss=2.430929)
--------------------------------------------------


Training Epoch 84/100: 100%|██████████| 399/399 [00:28<00:00, 13.87it/s]


Train Loss: 1.239162


Validation Epoch 84/100: 100%|██████████| 135/135 [00:02<00:00, 67.35it/s]


Valid Loss: 2.476950
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 85/100: 100%|██████████| 399/399 [00:28<00:00, 14.16it/s]


Train Loss: 1.234934


Validation Epoch 85/100: 100%|██████████| 135/135 [00:02<00:00, 66.72it/s]


Valid Loss: 2.458411
⚠️ No improvement. Early stop counter: 2/20
--------------------------------------------------


Training Epoch 86/100: 100%|██████████| 399/399 [00:28<00:00, 13.91it/s]


Train Loss: 1.230837


Validation Epoch 86/100: 100%|██████████| 135/135 [00:02<00:00, 64.32it/s]


Valid Loss: 2.423989
💾 Best model saved (val_loss=2.423989)
--------------------------------------------------


Training Epoch 87/100: 100%|██████████| 399/399 [00:28<00:00, 14.21it/s]


Train Loss: 1.229151


Validation Epoch 87/100: 100%|██████████| 135/135 [00:02<00:00, 67.02it/s]


Valid Loss: 2.423422
💾 Best model saved (val_loss=2.423422)
--------------------------------------------------


Training Epoch 88/100: 100%|██████████| 399/399 [00:28<00:00, 14.07it/s]


Train Loss: 1.227020


Validation Epoch 88/100: 100%|██████████| 135/135 [00:02<00:00, 65.80it/s]


Valid Loss: 2.416841
💾 Best model saved (val_loss=2.416841)
--------------------------------------------------


Training Epoch 89/100: 100%|██████████| 399/399 [00:28<00:00, 14.06it/s]


Train Loss: 1.224856


Validation Epoch 89/100: 100%|██████████| 135/135 [00:02<00:00, 63.23it/s]


Valid Loss: 2.413690
💾 Best model saved (val_loss=2.413690)
--------------------------------------------------


Training Epoch 90/100: 100%|██████████| 399/399 [00:28<00:00, 13.97it/s]


Train Loss: 1.221127


Validation Epoch 90/100: 100%|██████████| 135/135 [00:02<00:00, 66.21it/s]


Valid Loss: 2.465354
⚠️ No improvement. Early stop counter: 1/20
📊 Save loss history (up to epoch 90)
--------------------------------------------------


Training Epoch 91/100: 100%|██████████| 399/399 [00:28<00:00, 13.93it/s]


Train Loss: 1.219621


Validation Epoch 91/100: 100%|██████████| 135/135 [00:02<00:00, 66.52it/s]


Valid Loss: 2.441679
⚠️ No improvement. Early stop counter: 2/20
--------------------------------------------------


Training Epoch 92/100: 100%|██████████| 399/399 [00:28<00:00, 13.99it/s]


Train Loss: 1.217418


Validation Epoch 92/100: 100%|██████████| 135/135 [00:02<00:00, 66.19it/s]


Valid Loss: 2.430112
⚠️ No improvement. Early stop counter: 3/20
--------------------------------------------------


Training Epoch 93/100: 100%|██████████| 399/399 [00:28<00:00, 13.92it/s]


Train Loss: 1.214840


Validation Epoch 93/100: 100%|██████████| 135/135 [00:02<00:00, 65.54it/s]


Valid Loss: 2.426863
⚠️ No improvement. Early stop counter: 4/20
--------------------------------------------------


Training Epoch 94/100: 100%|██████████| 399/399 [00:28<00:00, 14.01it/s]


Train Loss: 1.212259


Validation Epoch 94/100: 100%|██████████| 135/135 [00:02<00:00, 66.29it/s]


Valid Loss: 2.433382
⚠️ No improvement. Early stop counter: 5/20
--------------------------------------------------


Training Epoch 95/100: 100%|██████████| 399/399 [00:28<00:00, 14.02it/s]


Train Loss: 1.210132


Validation Epoch 95/100: 100%|██████████| 135/135 [00:01<00:00, 68.66it/s]


Valid Loss: 2.415437
⚠️ No improvement. Early stop counter: 6/20
--------------------------------------------------


Training Epoch 96/100: 100%|██████████| 399/399 [00:27<00:00, 14.32it/s]


Train Loss: 1.208587


Validation Epoch 96/100: 100%|██████████| 135/135 [00:01<00:00, 68.00it/s]


Valid Loss: 2.445545
⚠️ No improvement. Early stop counter: 7/20
--------------------------------------------------


Training Epoch 97/100: 100%|██████████| 399/399 [00:27<00:00, 14.35it/s]


Train Loss: 1.207273


Validation Epoch 97/100: 100%|██████████| 135/135 [00:02<00:00, 63.76it/s]


Valid Loss: 2.426440
⚠️ No improvement. Early stop counter: 8/20
--------------------------------------------------


Training Epoch 98/100: 100%|██████████| 399/399 [00:28<00:00, 13.93it/s]


Train Loss: 1.205934


Validation Epoch 98/100: 100%|██████████| 135/135 [00:01<00:00, 68.08it/s]


Valid Loss: 2.407819
💾 Best model saved (val_loss=2.407819)
--------------------------------------------------


Training Epoch 99/100: 100%|██████████| 399/399 [00:29<00:00, 13.63it/s]


Train Loss: 1.203555


Validation Epoch 99/100: 100%|██████████| 135/135 [00:02<00:00, 66.95it/s]


Valid Loss: 2.416631
⚠️ No improvement. Early stop counter: 1/20
--------------------------------------------------


Training Epoch 100/100: 100%|██████████| 399/399 [00:27<00:00, 14.47it/s]


Train Loss: 1.202254


Validation Epoch 100/100: 100%|██████████| 135/135 [00:01<00:00, 67.56it/s]

Valid Loss: 2.401991
💾 Best model saved (val_loss=2.401991)
📊 Save loss history (up to epoch 100)
--------------------------------------------------
⏱️ Total training time: 0:50:55
🎯 Best validation loss: 2.401991
📈 Total epochs completed: 100
💾 Model saved to: best_AFNO_1to1_cond.pth
📊 Loss history saved to: loss_history_AFNO_1to1_cond.csv





In [15]:
loss_df = pd.DataFrame({
    'epoch': list(range(1, len(train_loss_history)+1)),
    'train_loss': train_loss_history,
    'val_loss': val_loss_history
})
loss_df.to_csv(loss_csv_path, index=False)

In [None]:
train_loss_history