In [2]:
import sys
import os
import pandas as pd
import numpy as np

# Add project root to Python path to find the 'src' directory
notebook_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
    print(f"Added project root to sys.path: {project_root}")

# Import your new CNN1D pipeline class
from src.cnn1d_pytorch_pipeline_global import CNN1DGlobalPipeline
# Point to your new CNN1D-specific config file
config_file = "../config_CNN1D_Global_SPEI.yaml" 

# Create an instance of the pipeline
pipeline = CNN1DGlobalPipeline(config_path=config_file)

# Execute the pipeline. This will be a single, long run.
# It will perform data loading, feature engineering, scaling, hyperparameter tuning,
# final model training, and evaluation.
results = pipeline.run_pipeline()
print("\n--- CNN1D Global Pipeline Finished ---")
if isinstance(results, dict) and results:
    print("Final Model Performance:")
    for split_name, metrics in results.items():
        print(f"\nMetrics for {split_name} set:")
        if metrics:
            for metric_name, value in metrics.items():
                print(f"  {metric_name.upper()}: {value:.4f}")
        else:
            print("  Metrics not available for this split.")
else:
    print(f"Pipeline may have returned an error status or incomplete results: {results}")

Configuration loaded from c:\Users\peera\Desktop\DroughtLSTM_oneday\config_CNN1D_Global_SPEI.yaml
Pipeline artifacts will be saved under 'c:\Users\peera\Desktop\DroughtLSTM_oneday\run_outputs\SPEI_CNN1D_Global_Run' and 'c:\Users\peera\Desktop\DroughtLSTM_oneday\models_saved\SPEI_CNN1D_Global_Run'

--- Starting CNN1D PyTorch GLOBAL Pipeline ---
Pipeline: Loading, sorting, and splitting data...
Successfully loaded data from c:\Users\peera\Desktop\DroughtLSTM_oneday\data\full.csv. Shape: (264201, 19)
Converted column 'time' to datetime.
Data sorted by ['time', 'lat', 'lon'].
Splitting data: Train ends 2017-12-31 00:00:00, Validation ends 2020-12-31 00:00:00
Train set shape: (251313, 19), Time range: 1901-01-16 00:00:00 to 2017-12-16 00:00:00
Validation set shape: (6444, 19), Time range: 2018-01-16 00:00:00 to 2020-12-16 00:00:00
Test set shape: (6444, 19), Time range: 2021-01-16 00:00:00 to 2023-12-16 00:00:00
Pipeline: Engineering features...
  DEBUG (create_lagged_features for other loc

  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)


Dropped 2148 rows due to NaNs after feature engineering (lags).
  DEBUG (create_lagged_features for other loc): Input df shape (6444, 19), head:
         lon   lat       time        tmp        dtr   cld        tmx   tmn  \
1404  101.25  6.25 2018-01-16  26.000000  10.800000  62.2  31.400000  20.6   
1405  101.25  6.25 2018-02-15  26.300001  10.900001  58.8  31.800001  20.9   
1406  101.25  6.25 2018-03-16  27.300001  11.200000  60.0  32.900000  21.7   

             pre    wet        vap      spei    soi    dmi       pdo  nino4  \
1404  221.900010  18.35  24.800001  1.908856  0.915 -0.200  0.038453  -0.31   
1405   82.500000   9.45  26.200000  1.103982 -0.731  0.215 -0.328776  -0.20   
1406   46.600002   5.75  28.200000 -1.374192  1.028 -0.120 -0.778429  -0.09   

      nino34  nino3    pet  
1404   -0.86  -1.17  114.7  
1405   -0.73  -0.77  109.2  
1406   -0.73  -0.87  127.1  
Dropped 2148 rows due to NaNs after feature engineering (lags).
  DEBUG (create_lagged_features for other loc

  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.groupby(group_by_cols, sort=False)[col_to_lag].shift(lag)
  df_lagged[new_col_name] = df_lagged.gr

Data scaling complete.
Pipeline: Creating Datasets and DataLoaders...


[I 2025-06-06 21:44:25,194] A new study created in memory with name: no-name-2859c415-b4c7-47e6-8973-25e8d3cbb376
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Pipeline: Starting Optuna for 1 trials... (Model input_size will be 15)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type           | Params | Mode 
-----------------------------------------------------
0 | model     | CNN1DRegressor | 5.2 K  | train
1 | criterion | MSELoss        | 0      | train
-----------------------------------------------------
5.2 K     Trainable params
0         Non-trainable params
5.2 K     Total params
0.021     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval mode
C:\Users\peera\AppData\Roaming\Python\Python312\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
  return F.conv1d(
  return F.mse_loss(input, target, reduction=self.reduction)
C:\Users\peera\AppData\Roaming\Python\Python312\site-packages\pytorch_lightning\trainer\connectors\data_connec

Pipeline: Optuna found best params: {'learning_rate': 0.0009037823946587664, 'n_conv_layers': 3, 'out_channels_power': 5, 'kernel_size': 2, 'dropout_rate': 0.2345702766375194}
Pipeline: Training final model...
Epoch 0: 100%|██████████| 974/974 [00:07<00:00, 126.79it/s]                 

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1: 100%|██████████| 974/974 [00:07<00:00, 122.12it/s, val_loss=0.510]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 974/974 [00:07<00:00, 121.95it/s, val_loss=0.510]
Pipeline: Final model training complete. Best model saved at: C:\Users\peera\Desktop\DroughtLSTM_oneday\models_saved\SPEI_CNN1D_Global_Run\global-cnn1d-best-model.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
C:\Users\peera\AppData\Roaming\Python\Python312\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:476: Your `predict_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
C:\Users\peera\AppData\Roaming\Python\Python312\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.



--- Final Model Evaluation ---
Predicting DataLoader 0: 100%|██████████| 965/965 [00:04<00:00, 233.91it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
C:\Users\peera\AppData\Roaming\Python\Python312\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


  Train Set: RMSE=0.9936, MAE=0.8090, R2=-0.0000
Predicting DataLoader 0: 100%|██████████| 9/9 [00:00<00:00, 214.28it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
C:\Users\peera\AppData\Roaming\Python\Python312\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.



  Validation Set: RMSE=1.0929, MAE=0.9560, R2=-0.5102
Predicting DataLoader 0: 100%|██████████| 9/9 [00:00<00:00, 127.60it/s]




  Test Set: RMSE=1.2328, MAE=1.0558, R2=-1.0274
Pipeline: Evaluation metrics saved.
Scaler saved to c:\Users\peera\Desktop\DroughtLSTM_oneday\models_saved\SPEI_CNN1D_Global_Run\global_robust_scaler_cnn1d.joblib
Pipeline: Global scaler saved.
--- CNN1D Global Pipeline Run Finished ---

--- CNN1D Global Pipeline Finished ---
Final Model Performance:

Metrics for train set:
  RMSE: 0.9936
  MAE: 0.8090
  R2: -0.0000

Metrics for validation set:
  RMSE: 1.0929
  MAE: 0.9560
  R2: -0.5102

Metrics for test set:
  RMSE: 1.2328
  MAE: 1.0558
  R2: -1.0274
