In [1]:
import sys
import os
import pandas as pd

# Add project root to Python path to find the 'src' directory
notebook_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
    print(f"Added project root to sys.path: {project_root}")

Added project root to sys.path: c:\Users\peera\Desktop\DroughtLSTM_oneday


In [2]:
# main.ipynb content to be used in Jupyter Notebook format

# Section 1: Setup
import torch
import pandas as pd
from torch.utils.data import DataLoader
from src.stadnet.stacked_model import STADNet
from src.stadnet.pe_utils import add_temporal_pe, add_spatial_pe
from src.stadnet.masked_loss import masked_mse
from src.stadnet.evaluate import evaluate_model
from src.stadnet.save_predictions import save_predictions_to_csv
from src.stadnet.visualize_utils import plot_comparison
from src.grid_utils import create_gridded_data
from src.data_utils import split_data_chronologically
# Section 4: Dataset Class from Grid
from torch.utils.data import Dataset
class GriddedSeq2SeqDataset(Dataset):
    def __init__(self, gridded_tensor, input_steps=12, target_steps=1, target_indices=[0, 1]):
        self.data = torch.tensor(gridded_tensor, dtype=torch.float32)
        self.input_steps = input_steps
        self.target_steps = target_steps
        self.target_indices = target_indices
        self.grid_shape = self.data.shape[1:3]  # (H, W)

    def __len__(self):
        return self.data.shape[0] - self.input_steps - self.target_steps + 1

    def __getitem__(self, idx):
        X = self.data[idx:idx + self.input_steps]  # [T, H, W, C]
        X = X.permute(0, 3, 1, 2).float()           # → [T, C, H, W]
        Y = self.data[idx + self.input_steps]      # [H, W, C]
        Y = Y[..., self.target_indices].permute(2, 0, 1)  # [C_target, H, W]
        return X, Y

        




In [5]:
pwd

'c:\\Users\\peera\\Desktop\\DroughtLSTM_oneday\\notebooks'

In [7]:
import yaml
import torch
import pandas as pd
from torch.utils.data import DataLoader, ConcatDataset
import optuna
import os
from src.stadnet.train import train_stadnet_model
from src.stadnet.stacked_model import STADNet
from src.stadnet.evaluate import evaluate_model
from src.stadnet.save_predictions import save_predictions_to_csv
from pathlib import Path
from src.data_utils import split_data_chronologically
from src.grid_utils import create_gridded_data
class GriddedSeq2SeqDataset(Dataset):
    def __init__(self, gridded_tensor, input_steps=12, target_steps=1, target_indices=[0, 1]):
        self.data = torch.tensor(gridded_tensor, dtype=torch.float32)
        self.input_steps = input_steps
        self.target_steps = target_steps
        self.target_indices = target_indices
        self.grid_shape = self.data.shape[1:3]  # (H, W)

    def __len__(self):
        return self.data.shape[0] - self.input_steps - self.target_steps + 1

    def __getitem__(self, idx):
        X = self.data[idx:idx + self.input_steps]  # [T, H, W, C]
        X = X.permute(0, 3, 1, 2).float()           # → [T, C, H, W]
        Y = self.data[idx + self.input_steps]      # [H, W, C]
        Y = Y[..., self.target_indices].permute(2, 0, 1)  # [C_target, H, W]
        return X, Y

class STADNetForecastingPipeline:
    def __init__(self, config_path):
        self.config = yaml.safe_load(open(config_path))
        self.model = None
        self.land_mask = None
        self.raw_df = None
        self.study = None
        self.best_params = None

    def load_data(self):
        self.full_df = pd.read_csv(self.config["data"]["csv_path"])
        self.raw_df = self.full_df  # For saving predictions

        self.train_df, self.val_df, self.test_df = split_data_chronologically(self.full_df, self.config)
        self.gridded_tensor_train, self.land_mask = create_gridded_data(self.train_df, self.config)  # assume same shape for all
        self.gridded_tensor_val, self.land_mask = create_gridded_data(self.val_df, self.config)  # assume same shape for all
        self.gridded_tensor_test, self.land_mask = create_gridded_data(self.test_df, self.config)  # assume same shape for all
        self.gridded_tensor_full, self.land_mask = create_gridded_data(self.full_df, self.config)  # assume same shape for all
        self.train_dataset = GriddedSeq2SeqDataset(
            self.gridded_tensor_train,
            input_steps=self.config['training']['input_steps'],
            target_steps=self.config["training"]["output_steps"],
            target_indices=list(range(len(self.config["model"]["output_targets"])))
        )
        self.val_dataset = GriddedSeq2SeqDataset(
            self.gridded_tensor_val,
            input_steps=self.config['training']['input_steps'],
            target_steps=self.config["training"]["output_steps"],
            target_indices=list(range(len(self.config["model"]["output_targets"])))
        )
        self.test_dataset = GriddedSeq2SeqDataset(
            self.gridded_tensor_test,
            input_steps=self.config['training']['input_steps'],
            target_steps=self.config["training"]["output_steps"],
            target_indices=list(range(len(self.config["model"]["output_targets"])))
        )
        self.full_dataset = GriddedSeq2SeqDataset(
            self.gridded_tensor_full,
            input_steps=self.config['training']['input_steps'],
            target_steps=self.config["training"]["output_steps"],
            target_indices=list(range(len(self.config["model"]["output_targets"])))
        )
        self.land_mask =  torch.tensor(self.land_mask, dtype=torch.float32).cuda()
        Path(self.config["output"]["predictions_csv"]).mkdir(parents=True, exist_ok=True)

    def build_model(self):
        self.model = STADNet(
            input_channels=self.config["model"]["input_channels"],
            hidden_channels=self.config["model"]["hidden_channels"],
            n_layers=self.config["model"]["n_layers"],
            output_targets=self.config["model"]["output_targets"]
        ).cuda()

    def train(self):
        self.study = optuna.create_study(direction="minimize")
        self.study.optimize(lambda trial: self.objective(trial, self.config, self.train_dataset, self.val_dataset, self.land_mask), n_trials=10)

        self.best_params = self.study.best_trial.params
        print("Best hyperparameters found by Optuna:", self.best_params)

        # Reconstruct hidden_channels
        n_layers = self.best_params['n_layers']
        hidden_channels = [self.best_params[f'hidden_channels_{i}'] for i in range(n_layers)]
        print("Reconstructed hidden_channels:", hidden_channels)

        cfg_final = self.config.copy()
        cfg_final['model']['hidden_channels'] = hidden_channels
        cfg_final['model']['n_layers'] = n_layers
        cfg_final['training']['learning_rate'] = self.best_params['learning_rate']
        cfg_final['training']['dropout_rate'] = self.best_params['dropout_rate']
        cfg_final['training']['batch_size'] = self.best_params['batch_size']

        self.train_val_dataset = ConcatDataset([self.train_dataset, self.val_dataset])
        train_val_loader = DataLoader(self.train_val_dataset, batch_size=cfg_final['training']['batch_size'], shuffle=True)
        test_loader = DataLoader(self.test_dataset, batch_size=cfg_final['training']['batch_size'], shuffle=False)

        self.model = train_stadnet_model(train_val_loader, cfg_final, 
        val_loader=test_loader, land_mask=self.land_mask,log_path=self.config["output"]["predictions_csv"]+"prediction_log.csv")
        torch.save(self.model.state_dict(), "final_model.pth")

        # Final evaluation
        metrics = evaluate_model(self.model, test_loader, self.land_mask, output_targets=cfg_final['model']['output_targets'])
        print("Final Evaluation on Test Set:")
        for target, vals in metrics.items():
            print(f"  {target}: {vals}")

    def evaluate(self):
        val_loader = DataLoader(self.val_dataset, batch_size=self.config["training"]["batch_size"], shuffle=False)
        metrics = evaluate_model(self.model, val_loader, self.land_mask, self.config["model"]["output_targets"])
        print("Evaluation Metrics on Validation Set:")
        for target, vals in metrics.items():
            print(f"  {target}: {vals}")
        return metrics

    def predict_and_save(self):
        val_loader = DataLoader(self.val_dataset, batch_size=self.config["training"]["batch_size"], shuffle=False)
        save_predictions_to_csv(
            self.model,
            val_loader,
            self.land_mask,
            self.config["model"]["output_targets"],
            self.raw_df,
            self.config["output"]["predictions_csv"]
        )

    def objective(self, trial, config, train_dataset, val_dataset, land_mask):

        n_layers = trial.suggest_int('n_layers', 2, 5)
        base_channels = trial.suggest_categorical('hidden_channels', [128, 256, 512])
        hidden_channels = [base_channels] * n_layers
        learning_rate = trial.suggest_float('learning_rate', 2e-5, 5e-3, log=True)
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.3)
        batch_size = trial.suggest_int('batch_size', 8, 32, step=8)

        config['model']['hidden_channels'] = hidden_channels
        config['model']['n_layers'] = n_layers
        config['training']['learning_rate'] = learning_rate
        config['training']['dropout_rate'] = dropout_rate
        config['training']['batch_size'] = batch_size

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        model = train_stadnet_model(train_loader, config, val_loader=val_loader, land_mask=land_mask)
        metrics = evaluate_model(model, val_loader, land_mask, output_targets=config['model']['output_targets'])
        ret_error = []
        for k,v in metrics.items():
            ret_error.append(v['RMSE'])
        return sum(ret_error) / len(ret_error)

    def run(self):
        self.load_data()
        self.train()
        self.evaluate()
        self.predict_and_save()


In [8]:
pipeline =  STADNetForecastingPipeline(config_path='../config/stadnet/config_stadnet_pet.yaml')
pipeline.run()

Splitting data: Train ends 2017-12-31 00:00:00, Validation ends 2020-12-31 00:00:00
Train set shape: (251316, 19), Time range: 1901-01-16 00:00:00 to 2017-12-16 00:00:00
Validation set shape: (6444, 19), Time range: 2018-01-16 00:00:00 to 2020-12-16 00:00:00
Test set shape: (6444, 19), Time range: 2021-01-16 00:00:00 to 2023-12-16 00:00:00
--- Starting Data Gridding Process (Fixed Step Method) ---
Using fixed grid step of: 0.5 degrees
Grid boundaries: LAT (6.25, 20.25), LON (97.75, 105.25)
Calculated grid dimensions: Height=29, Width=16
Created 2D validity mask (29x16) with 179 valid data pixels.
Pivoting data into a 4D tensor of shape (1404, 29, 16, 15)...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['row_idx'] = ((df[lat_col] - lat_min) / fixed_step).round().astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['col_idx'] = ((df[lon_col] - lon_min) / fixed_step).round().astype(int)


--- Data Gridding Process Finished ---
--- Starting Data Gridding Process (Fixed Step Method) ---
Using fixed grid step of: 0.5 degrees
Grid boundaries: LAT (6.25, 20.25), LON (97.75, 105.25)
Calculated grid dimensions: Height=29, Width=16
Created 2D validity mask (29x16) with 179 valid data pixels.
Pivoting data into a 4D tensor of shape (36, 29, 16, 15)...
--- Data Gridding Process Finished ---
--- Starting Data Gridding Process (Fixed Step Method) ---
Using fixed grid step of: 0.5 degrees
Grid boundaries: LAT (6.25, 20.25), LON (97.75, 105.25)
Calculated grid dimensions: Height=29, Width=16
Created 2D validity mask (29x16) with 179 valid data pixels.
Pivoting data into a 4D tensor of shape (36, 29, 16, 15)...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['row_idx'] = ((df[lat_col] - lat_min) / fixed_step).round().astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['col_idx'] = ((df[lon_col] - lon_min) / fixed_step).round().astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['row_idx'] = ((df[lat_col] - lat_min) / fixed_step).

--- Data Gridding Process Finished ---
--- Starting Data Gridding Process (Fixed Step Method) ---
Using fixed grid step of: 0.5 degrees
Grid boundaries: LAT (6.25, 20.25), LON (97.75, 105.25)
Calculated grid dimensions: Height=29, Width=16
Created 2D validity mask (29x16) with 179 valid data pixels.
Pivoting data into a 4D tensor of shape (1476, 29, 16, 15)...


[I 2025-06-19 03:41:34,316] A new study created in memory with name: no-name-9d3d26f7-73d2-4f29-865e-ae5112313e5d


--- Data Gridding Process Finished ---


[W 2025-06-19 03:41:35,218] Trial 0 failed with parameters: {'n_layers': 2, 'hidden_channels': 512, 'learning_rate': 0.0004171692439304797, 'dropout_rate': 0.23462311778297668, 'batch_size': 32} because of the following error: RuntimeError("shape '[32, -1, 1, 1]' is invalid for input of size 8").
Traceback (most recent call last):
  File "C:\Users\peera\AppData\Roaming\Python\Python312\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\peera\AppData\Local\Temp\ipykernel_47324\343227285.py", line 87, in <lambda>
    self.study.optimize(lambda trial: self.objective(trial, self.config, self.train_dataset, self.val_dataset, self.land_mask), n_trials=10)
                                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\peera\AppData\Local\Temp\ipykernel_47324\343227285.py", line 155, in objective
    model = train_s

RuntimeError: shape '[32, -1, 1, 1]' is invalid for input of size 8