Import necessary packages

In [6]:
import sys
import os
import pandas as pd
import numpy as np
import time
import pickle
from typing import List, Dict
from itertools import groupby
import random
import numba
from tqdm import tqdm
from collections import defaultdict

#Plot
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

#Torch
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torch.distributions import Gumbel
from torch.amp import GradScaler, autocast

#Import necessary functions
sys.path.append("aux_functions/")
from functions_datasets_global import global_model as DataBase # define what you import as DataBase
from functions_datasets_global import validate_samples, manual_train_test_split
from functions_loss_global import gaussian_distribution, nll_loss
from functions_aux_global import create_folder, set_random_seed, write_report

Data paths, parameters, and folders

In [7]:
path_entities_ids = 'path/loation_id/*.txt' # Text file for all location ids

path_flagged = 'path/location_id/*.csv' # Path for flagged location ids (through quality control)

path_data = 'path/data/timeseries/' # Path for separate dynamic timeseries
path_static = 'path/static_attributes/*.csv' # Path for static attributes (single file)

dynamic_input = ['precip_daymet', 'shortwave_rad_daymet', 't_max_daymet', 't_min_daymet', 'vapor_pressure_daymet']

target = ['soil_moisture']

static_input = ['lat', 'lon', 'depth', 'elevation', 'slope_degree', 'clay_fraction', 'silt_fraction', 'sand_fraction',
               'climate_zones_5.0', 'climate_zones_6.0', 'climate_zones_7.0', 'climate_zones_8.0', 'climate_zones_9.0',
                'climate_zones_10.0', 'climate_zones_11.0', 'climate_zones_12.0', 'climate_zones_13.0', 'climate_zones_14.0',
                'climate_zones_15.0', 'climate_zones_16.0', 'climate_zones_18.0', 'land_cover_2.0', 'land_cover_4.0', 'land_cover_6.0',
                'land_cover_9.0', 'land_cover_11.0', 'land_cover_12.0', 'land_cover_13.0', 'land_cover_14.0', 'land_cover_15.0', 'land_cover_16.0',
                'land_cover_18.0', 'land_cover_20.0', 'land_cover_21.0', 'land_cover_22.0']

model_hyper_parameters = {
    "input_size": len(dynamic_input) + len(static_input),
    "no_of_layers":2,  
    "seq_length": 180,
    "hidden_size": 128, 
    "batch_size": 64,
    "no_of_epochs": 10,            
    "drop_out": 0.3, 
    "learning_rate": 0.0005,
    "adapt_learning_rate_epoch": 1,
    "adapt_gamma_learning_rate": 0.5,
    "set_forget_gate":3
}

running_device = 'cpu'
seed = 42

with open(path_flagged, 'r') as f:
    flagged_ids = set(
        line.strip().replace('_filtered', '') for line in f if line.strip()
    )
    
with open(path_entities_ids, "r") as f:
    all_entity_ids = [line.strip() for line in f if line.strip()]

# Exclude flagged locations
all_entity_ids = [loc for loc in all_entity_ids if loc not in flagged_ids]

# Ensure reproducibility
random.shuffle(all_entity_ids)

path_save_folder = 'path/output_folder'
create_folder(folder_path=path_save_folder)

Folder 'results_daymet_final' already exists.


Dataset class

In [8]:
class BaseDataset(Dataset):
    def __init__(self,
                 dynamic_input: List[str],
                 static_input: List[str],
                 set_type: str,
                 target: List[str],
                 sequence_length: int,
                 path_entities_ids: str,
                 path_data: str,
                 path_static_attributes: str,
                 check_NaN: bool = True):
        
        self.dynamic_input = dynamic_input
        self.static_input = static_input
        self.target = target
        self.sequence_length = sequence_length
        self.path_data = path_data
        self.check_NaN = check_NaN

        # Load train/test IDs
        if isinstance(path_entities_ids, list):
            self.entities_ids = path_entities_ids
        else:
            self.entities_ids = self._load_entity_ids(path_entities_ids)


        # Load and filter static attributes
        self.df_attributes = self._load_static_attributes(path_static_attributes)

        # Initialize containers
        self.sequence_data: Dict[str, Dict[str, torch.Tensor]] = {}
        self.valid_samples = []
        self.location_std = {}
        self.scaler = {}

        self._load_time_series()

        print(f"Loaded {len(self.sequence_data)} locations with {len(self.valid_samples)} valid samples.")

    def _load_entity_ids(self, path_ids: str) -> List[str]:
        """Opens simple text file and reads each line as a location id & returns a list of ids"""
        if not os.path.isfile(path_ids):
            raise FileNotFoundError(f"ID file not found: {path_ids}")
        with open(path_ids, "r") as f:
            return [line.strip() for line in f if line.strip()]
    
    def _load_static_attributes(self, path: str) -> pd.DataFrame:
        """Reads a csv file with static attributes, sets location id as index, checks which ids from *entities_ids*
        exist, and skips locations with missing attributes with warning - returns filtered df with valid ids"""
        df = pd.read_csv(path).set_index('location_id')
        available_ids = set(df.index)
        valid_ids = [loc for loc in self.entities_ids if loc in available_ids]
        missing = set(self.entities_ids) - available_ids
        if missing:
            print(f"Warning: Skipping {len(missing)} locations with missing static attributes.")
        self.entities_ids = valid_ids
        return df.loc[self.entities_ids]

    def _load_time_series(self):
        """Builds file path, loads the csvs (must have date column); checks if all dynamic and target columns are available;
        extracts different features; returns a mask saying which time indeices can be used to form valid sequences (NaN filter);
        stores everything in PyTorch tensors"""
        for loc_id in self.entities_ids:
            ts_path = os.path.join(self.path_data, f"{loc_id}.csv")
            if not os.path.isfile(ts_path):
                print(f"Warning: Missing file {ts_path}. Skipping.")
                continue

            df_ts = pd.read_csv(ts_path, parse_dates=['date'])
            if not all(col in df_ts.columns for col in self.dynamic_input + self.target):
                print(f"Warning: {loc_id} missing required columns. Skipping.")
                continue

            x_dynamic = df_ts[self.dynamic_input].values
            y_target = df_ts[self.target].values
            dates = df_ts['date'].values

            attributes = self.df_attributes.loc[loc_id, self.static_input].values if self.static_input else None

            flags = validate_samples(x_dynamic, y_target, attributes, self.sequence_length, self.check_NaN)

            for idx in np.flatnonzero(flags == 1):
                self.valid_samples.append((loc_id, idx))

            self.sequence_data[loc_id] = {
                'x_d': torch.tensor(x_dynamic, dtype=torch.float32),
                'y': torch.tensor(y_target, dtype=torch.float32),
                'dates': dates
            }
            if self.static_input:
                self.sequence_data[loc_id]['x_s'] = torch.tensor(attributes, dtype=torch.float32)

    def __len__(self):
        """Returns the number of valid training samples"""
        return len(self.valid_samples)

    def __getitem__(self, idx):
        """For a given index in the dataset finds the corresponding location and time index and extracts
        the dynamic input sequence; repeat static features over the sequence and concatenate them with
        corresponding dynamic features"""
        location, i = self.valid_samples[idx]
        data = self.sequence_data[location]

        x_dynamic_seq = data['x_d'][i - self.sequence_length + 1:i + 1]
        
        if self.static_input:
            x_static = data['x_s'].repeat(self.sequence_length, 1)
            x_input = torch.cat([x_dynamic_seq, x_static], dim=1)
        else:
            x_input = x_dynamic_seq

        y_target = data['y'][i]

        if self.location_std:
            return x_input, y_target, self.location_std[location].unsqueeze(0)
        return x_input, y_target

    def calculate_location_std(self):
        """Compute per-location standard deviation of target variable."""
        for loc, data in self.sequence_data.items():
            self.location_std[loc] = data['y'].nanstd().to(dtype=torch.float32)

    def calculate_global_statistics(self):
        """Compute global mean/std using NumPy to handle NaNs."""
        if not self.sequence_data:
            raise ValueError("No time series data available.")

        # Convert to NumPy for nan-safe stats
        all_x = torch.cat([d['x_d'] for d in self.sequence_data.values()], dim=0).numpy()
        all_y = torch.cat([d['y'] for d in self.sequence_data.values()], dim=0).numpy()

        self.scaler = {
            'x_d_mean': torch.tensor(np.nanmean(all_x, axis=0), dtype=torch.float32),
            'x_d_std': torch.tensor(np.nanstd(all_x, axis=0), dtype=torch.float32),
            'y_mean': torch.tensor(np.nanmean(all_y, axis=0), dtype=torch.float32),
            'y_std': torch.tensor(np.nanstd(all_y, axis=0), dtype=torch.float32),
        }

        if self.static_input:
            attr_np = self.df_attributes[self.static_input].values
            self.scaler['x_s_mean'] = torch.tensor(np.nanmean(attr_np, axis=0), dtype=torch.float32)
            self.scaler['x_s_std'] = torch.tensor(np.nanstd(attr_np, axis=0), dtype=torch.float32)


    def standardize_data(self, standardize_output: bool = True):
        """Standardize inputs and optionally outputs using precomputed stats."""
        eps = 1e-6 # Avoids division by zero
        for data in self.sequence_data.values():
            data['x_d'] = (data['x_d'] - self.scaler['x_d_mean']) / (self.scaler['x_d_std'] + eps)
            
            if self.static_input:
                data['x_s'] = (data['x_s'] - self.scaler['x_s_mean']) / (self.scaler['x_s_std'] + eps)
            
            if standardize_output:
                data['y'] = (data['y'] - self.scaler['y_mean']) / (self.scaler['y_std'] + eps)

Model

In [9]:
def get_device(running_device: str = "gpu") -> torch.device:
    """Checks if user requested gpu and if CUDA is available"""
    if running_device == "gpu" and torch.cuda.is_available():
        print(f"✅ Using GPU: {torch.cuda.get_device_name(0)}")
        return torch.device("cuda:0")
    else:
        print("⚠️ Using CPU (GPU not available or overridden)")
        return torch.device("cpu")


class MDN_LSTM(nn.Module):
    def __init__(self, model_hyper_parameters, num_mixtures=3, temperature: float = 0.8):
        super().__init__()
        self.input_size = model_hyper_parameters['input_size']
        self.hidden_size = model_hyper_parameters['hidden_size']
        self.num_layers = model_hyper_parameters['no_of_layers']
        self.dropout_rate = model_hyper_parameters['drop_out']
        self.num_mixtures = num_mixtures # Number of Gaussan components in the output (default=3)
        self.temperature = temperature # Controls how "soft" or "peaked" the mixture weights are

        self.lstm = nn.LSTM(input_size=self.input_size,
                            hidden_size=self.hidden_size,
                            num_layers=self.num_layers,
                            batch_first=True)

        self.dropout = nn.Dropout(self.dropout_rate)

        self.fc_pi = nn.Linear(self.hidden_size, self.num_mixtures) # Output raw logits for mixture weights
        self.fc_mu = nn.Linear(self.hidden_size, self.num_mixtures) # Output means of each Gaussian
        self.fc_sigma = nn.Linear(self.hidden_size, self.num_mixtures) # Outputs raw values that will be transformed to positive standard deviation

        self._init_weights()

    def _init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight_ih' in name:
                nn.init.xavier_uniform_(param) # Xavier initialization - good for stable gradients
            elif 'weight_hh' in name:
                nn.init.orthogonal_(param)
            elif 'bias' in name:
                nn.init.constant_(param, 0)

        for layer in [self.fc_pi, self.fc_mu, self.fc_sigma]:
            nn.init.xavier_uniform_(layer.weight)
            nn.init.constant_(layer.bias, 0)

    def forward(self, x):
        noise = torch.randn_like(x) * 0.005 # Adds small Gaussian noise to inputs during training - helps regularize the model and avoid overfitting
        x = x + noise
        out, _ = self.lstm(x)
        out = self.dropout(out[:, -1, :])  # take last time step

        return self._mdn_head(out)

    def _mdn_head(self, lstm_output):
        # Apply temperature scaling to logits before softmax - dividing by temperature affects the softness of softmax
        # Temperature < 1 - makes the distribution sharper
        # Temperature > 1 - makes the distribution softer (more uniform)
        logits = self.fc_pi(lstm_output)
        pi = F.softmax(logits / self.temperature, dim=-1)

        mu = self.fc_mu(lstm_output) # Gives the mean of each Gaussian component
        sigma = F.softplus(self.fc_sigma(lstm_output)) + 1e-6  # ensure positive

        return pi, mu, sigma

Training and evaluation

In [None]:
def create_folder(path):
    """mkdir if missing"""
    if not os.path.exists(path):
        os.makedirs(path)

def to_numpy_1d(tensor):
    """detach-cpu-numpy-flatten (for saving to csv)"""
    return tensor.detach().cpu().numpy().flatten()


"""unscale/unscale_feature/unscale_scalar - reverse standardization using stored means/stds"""

def unscale(value, mean, std):
    return value * std + mean

def unscale_feature(tensor, idx, scaler_mean, scaler_std):
    return unscale(to_numpy_1d(tensor[:, idx]), scaler_mean[idx].item(), scaler_std[idx].item())

def unscale_scalar(value, idx, scaler_mean, scaler_std):
    return unscale(value, scaler_mean[idx].item(), scaler_std[idx].item())

def set_random_seed(seed=42):
    """Sets seeds for random, numpy, torch, torch.cuda and configures cuDNN for detetrministic execution
    - enables reproducible folds/runs"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Call this once before everything
seed = 42
set_random_seed(seed)

# Group entity IDs by location
# Entity ids look like *location_depth* - grouping entities under a base location name before cross-validation
# Then perform 5-fold cross-validation on locations(not on individual series) to avoid spatial leakage
location_to_entities = defaultdict(list)
for eid in all_entity_ids:
    location = eid.split('_')[0]  # e.g., 'Abrams_0.05' → 'Abrams'
    location_to_entities[location].append(eid)

all_locations = list(location_to_entities.keys())
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 5-fold cross-validation loop
for fold, (train_loc_idx, val_loc_idx) in enumerate(kf.split(all_locations)):
    print(f"\n=== Fold {fold + 1} ===")

    train_locations = [all_locations[i] for i in train_loc_idx]
    val_locations = [all_locations[i] for i in val_loc_idx]

    train_ids = [eid for loc in train_locations for eid in location_to_entities[loc]]
    val_ids = [eid for loc in val_locations for eid in location_to_entities[loc]]

    path_save_folder_fold = os.path.join(path_save_folder, f"fold_{fold+1}")
    create_folder(path_save_folder_fold)

    # Create datasets - Train dataset loads all training entities, computes global mean/stds from
    # train only, and standardizes x_d, x_s, y
    training_dataset = BaseDataset(
        dynamic_input=dynamic_input,
        static_input=static_input,
        target=target,
        sequence_length=model_hyper_parameters['seq_length'],
        path_entities_ids=train_ids,
        path_data=path_data,
        path_static_attributes=path_static,
        set_type='train',
        check_NaN=True
    )
    training_dataset.calculate_global_statistics()
    training_dataset.standardize_data()

    # Validation (or test) dataset uses train scalers, but does not standardize y
    # (so ground truth stays in original scale for easier reporting/metrics)
    validation_dataset = BaseDataset(
        dynamic_input=dynamic_input,
        static_input=static_input,
        target=target,
        sequence_length=model_hyper_parameters['seq_length'],
        path_entities_ids=val_ids,
        path_data=path_data,
        path_static_attributes=path_static,
        set_type='val',
        check_NaN=True
    )
    validation_dataset.scaler = training_dataset.scaler
    validation_dataset.standardize_data(standardize_output=False)

    train_loader = DataLoader(
        training_dataset,
        batch_size=model_hyper_parameters['batch_size'],
        shuffle=True,
        drop_last=True
    )

    # For validation creates one batch per location - the batch sampler uses indices so each batch
    # equals all valid timesteps for one location
    validation_batches = [
        [i for i, _ in group]
        for _, group in groupby(
            enumerate(validation_dataset.valid_samples),
            key=lambda x: x[1][0]
        )
    ]
    validation_loader = DataLoader(
        dataset=validation_dataset,
        batch_sampler=validation_batches
    )

    # Train model
    set_random_seed(seed=seed)
    device = get_device("gpu")
    model = MDN_LSTM(model_hyper_parameters).to(device)

    try:
        model = torch.compile(model)
    except Exception:
        pass

    optimizer = torch.optim.Adam(model.parameters(), lr=model_hyper_parameters["learning_rate"])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=model_hyper_parameters["adapt_learning_rate_epoch"], gamma=model_hyper_parameters["adapt_gamma_learning_rate"])

    model.lstm.bias_hh_l0.data[model_hyper_parameters['hidden_size']: 2 * model_hyper_parameters['hidden_size']] = model_hyper_parameters["set_forget_gate"]

    scaler = GradScaler()

    training_start_time = time.time()
    
    for epoch in range(1, model_hyper_parameters["no_of_epochs"] + 1):
        model.train()
        running_loss = 0.0
        epoch_start = time.time()

        for x_lstm, y in train_loader:
            x_lstm = x_lstm.to(device)
            y = y.to(device)

            optimizer.zero_grad()
            with autocast(device_type='cuda'):
                pi, mu, sigma = model(x_lstm)
                loss = nll_loss(pi, mu, sigma, y)

            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        lr = optimizer.param_groups[0]['lr']
        epoch_time = time.time() - epoch_start

        report = f"Fold {fold+1} | Epoch {epoch:02d} | Loss: {avg_loss:.4f} | LR: {lr:.5f} | Time: {epoch_time:.1f}s"
        print(report)
        write_report(f'{path_save_folder_fold}/run_progress.txt', report)

        torch.save(model.state_dict(), f'{path_save_folder_fold}/epoch_{epoch}.pt')
        scheduler.step()
        
    print(f"Fold {fold+1} training done in {time.time() - training_start_time:.1f}s")

    # Evaluate and save results
    test_dataset = validation_dataset
    test_loader = validation_loader

    valid_location_testing = [loc_id for loc_id, _ in groupby(test_dataset.valid_samples, key=lambda x: x[0])]
    valid_entity_per_location_testing = [
        [i for _, i in group]
        for _, group in groupby(test_dataset.valid_samples, key=lambda x: x[0])
    ]

    results_dir = os.path.join(path_save_folder_fold, 'results')
    os.makedirs(results_dir, exist_ok=True)

    pi_list, mu_list, sigma_list = [], [], []
    test_results = []

    model.eval()
    with torch.no_grad():
        for i, (x_lstm, y) in enumerate(tqdm(test_loader, desc=f"[Fold {fold}] Processing test locations")):
            x_lstm = x_lstm.to(device).float()
            pi, mu, sigma = model(x_lstm)

            y_std = test_dataset.scaler['y_std'].to(device)
            y_mean = test_dataset.scaler['y_mean'].to(device)
            mu = mu * y_std + y_mean
            sigma = sigma * y_std

            location_id = valid_location_testing[i]
            seq_data = test_dataset.sequence_data[location_id]
            indices = valid_entity_per_location_testing[i]
            dates = seq_data['dates'][indices]

            df_data = {'date': dates}
            df_data['soil_moisture'] = to_numpy_1d(seq_data['y'][indices])
            x_d = seq_data['x_d']
            x_s = seq_data['x_s']

            # If features exist add to output (optional)
           #for feature in ['']:
               # if feature in test_dataset.dynamic_input:
                #    idx = test_dataset.dynamic_input.index(feature)
                 #   df_data[feature] = unscale_feature(x_d[indices], idx,
                                                       test_dataset.scaler['x_d_mean'],
                                                       test_dataset.scaler['x_d_std'])

           
            # Add lon/lat columns to output - useful for evaluation, although can be done later as well
            for static_feature in ['lon', 'lat']:
                if static_feature in test_dataset.static_input:
                    idx = test_dataset.static_input.index(static_feature)
                    value = unscale_scalar(x_s[idx], idx,
                                           test_dataset.scaler['x_s_mean'],
                                           test_dataset.scaler['x_s_std'])
                    df_data[static_feature] = np.full(len(dates), value)

            for k in range(pi.shape[1]): # Add MDN params
                df_data[f'pi_{k+1}'] = to_numpy_1d(pi[:, k])
                df_data[f'mu_{k+1}'] = to_numpy_1d(mu[:, k])
                df_data[f'sigma_{k+1}'] = to_numpy_1d(sigma[:, k])

            test_results.append(pd.DataFrame(df_data))
            pi_list.append(pi.cpu().numpy())
            mu_list.append(mu.cpu().numpy())
            sigma_list.append(sigma.cpu().numpy())

        # Save concatenated csvs across locations
        all_columns = set().union(*(df.columns for df in test_results))
        desired_columns = [
            'date', 'soil_moisture',
            'lon', 'lat', 'pi_1', 'pi_2', 'pi_3', 'sigma_1', 'sigma_2', 'sigma_3', 'mu_1', 'mu_2', 'mu_3'
        ]
        used_columns = [col for col in desired_columns if col in all_columns]

        total_df = pd.concat([
            df[used_columns] for df in test_results
        ], ignore_index=True).dropna(subset=['date', 'soil_moisture']) 

    output_path = os.path.join(results_dir, f'total_locations_fold_{fold}.csv')
    total_df.to_csv(output_path, index=False)
    print(f"Saved fold {fold} results to: {output_path}")

    # Save per-location fileterd csvs (for location-specific evaluation)
    output_folder = os.path.join(results_dir, f"filtered_per_location")
    os.makedirs(output_folder, exist_ok=True)

    desired_columns = [
        'date', 'soil_moisture',
        'lon', 'lat', 'pi_1', 'pi_2', 'pi_3', 'sigma_1', 'sigma_2', 'sigma_3', 'mu_1', 'mu_2', 'mu_3'
    ]

    test_results_dict = {
        valid_location_testing[i]: result
        for i, result in enumerate(test_results)
    }

    for location, df_ts in test_results_dict.items():
        available_columns = [col for col in desired_columns if col in df_ts.columns]
        filtered_df = df_ts[available_columns].copy()
        if 'date' in filtered_df.columns:
            filtered_df.sort_values('date', inplace=True)
        filepath = os.path.join(output_folder, f"{location}_filtered.csv")
        filtered_df.to_csv(filepath, index=False)

    print(f"Saved {len(test_results_dict)} individual filtered files to: {output_folder}")
    



=== Fold 1 ===
Loaded 2212 locations with 4752554 valid samples.
Loaded 545 locations with 1129435 valid samples.
✅ Using GPU: NVIDIA A100-SXM4-40GB
Fold 1 | Epoch 01 | Loss: 0.3268 | LR: 0.00050 | Time: 450.3s
Fold 1 | Epoch 02 | Loss: -0.0423 | LR: 0.00025 | Time: 454.3s
Fold 1 | Epoch 03 | Loss: -0.1775 | LR: 0.00013 | Time: 459.4s
Fold 1 | Epoch 04 | Loss: -0.2555 | LR: 0.00006 | Time: 454.5s
Fold 1 | Epoch 05 | Loss: -0.2997 | LR: 0.00003 | Time: 449.4s
Fold 1 | Epoch 06 | Loss: -0.3243 | LR: 0.00002 | Time: 462.2s
Fold 1 | Epoch 07 | Loss: -0.3378 | LR: 0.00001 | Time: 464.2s
Fold 1 | Epoch 08 | Loss: -0.3441 | LR: 0.00000 | Time: 460.9s
Fold 1 | Epoch 09 | Loss: -0.3477 | LR: 0.00000 | Time: 455.4s
Fold 1 | Epoch 10 | Loss: -0.3496 | LR: 0.00000 | Time: 451.8s
✅ Fold 1 training done in 4562.5s


[Fold 0] Processing test locations: 100%|██████████| 545/545 [00:58<00:00,  9.32it/s]


✅ Saved fold 0 results to: results_daymet_final/fold_1/results/total_locations_fold_0.csv
✅ Saved 545 individual filtered files to: results_daymet_final/fold_1/results/filtered_per_location

=== Fold 2 ===
Loaded 2207 locations with 4599266 valid samples.
Loaded 550 locations with 1282723 valid samples.
✅ Using GPU: NVIDIA A100-SXM4-40GB
Fold 2 | Epoch 01 | Loss: 0.3304 | LR: 0.00050 | Time: 447.3s
Fold 2 | Epoch 02 | Loss: -0.0261 | LR: 0.00025 | Time: 445.9s
Fold 2 | Epoch 03 | Loss: -0.1545 | LR: 0.00013 | Time: 446.6s
Fold 2 | Epoch 04 | Loss: -0.2287 | LR: 0.00006 | Time: 448.9s
Fold 2 | Epoch 05 | Loss: -0.2713 | LR: 0.00003 | Time: 446.6s
Fold 2 | Epoch 06 | Loss: -0.2947 | LR: 0.00002 | Time: 443.6s
Fold 2 | Epoch 07 | Loss: -0.3070 | LR: 0.00001 | Time: 443.0s
Fold 2 | Epoch 08 | Loss: -0.3137 | LR: 0.00000 | Time: 443.0s
Fold 2 | Epoch 09 | Loss: -0.3170 | LR: 0.00000 | Time: 442.9s
Fold 2 | Epoch 10 | Loss: -0.3188 | LR: 0.00000 | Time: 442.8s
✅ Fold 2 training done in 4450.

[Fold 1] Processing test locations: 100%|██████████| 550/550 [01:05<00:00,  8.43it/s]


✅ Saved fold 1 results to: results_daymet_final/fold_2/results/total_locations_fold_1.csv
✅ Saved 550 individual filtered files to: results_daymet_final/fold_2/results/filtered_per_location

=== Fold 3 ===
Loaded 2179 locations with 4708201 valid samples.
Loaded 578 locations with 1173788 valid samples.
✅ Using GPU: NVIDIA A100-SXM4-40GB
Fold 3 | Epoch 01 | Loss: 0.3109 | LR: 0.00050 | Time: 454.5s
Fold 3 | Epoch 02 | Loss: -0.0623 | LR: 0.00025 | Time: 457.4s
Fold 3 | Epoch 03 | Loss: -0.1953 | LR: 0.00013 | Time: 457.3s
Fold 3 | Epoch 04 | Loss: -0.2712 | LR: 0.00006 | Time: 457.4s
Fold 3 | Epoch 05 | Loss: -0.3147 | LR: 0.00003 | Time: 457.8s
Fold 3 | Epoch 06 | Loss: -0.3389 | LR: 0.00002 | Time: 458.6s
Fold 3 | Epoch 07 | Loss: -0.3515 | LR: 0.00001 | Time: 458.0s
Fold 3 | Epoch 08 | Loss: -0.3578 | LR: 0.00000 | Time: 457.5s
Fold 3 | Epoch 09 | Loss: -0.3615 | LR: 0.00000 | Time: 457.6s
Fold 3 | Epoch 10 | Loss: -0.3628 | LR: 0.00000 | Time: 457.6s
✅ Fold 3 training done in 4573.

[Fold 2] Processing test locations:  17%|█▋        | 99/578 [00:10<00:54,  8.76it/s]