In [1]:
import os
import joblib
import pandas as pd
import numpy as np
import random
import itertools

import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')

import sys
sys.path.append('/data/Hydra_Work/Competition_Functions') 
from Processing_Functions import process_forecast_date, process_seasonal_forecasts

import ML_Functions
from ML_Functions import Hydra_LSTM_Block, initialize_models_optimizers, PinballLoss, SumPinballLoss, EarlyStopper, Model_Run, No_Body_Model_Run
from Data_Transforming import read_nested_csvs, generate_daily_flow, use_USGS_flow_data, USGS_to_daily_df_yearly


sys.path.append('/data/Hydra_Work/Pipeline_Functions')
from Folder_Work import filter_rows_by_year, csv_dictionary, add_day_of_year_column

from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim.lr_scheduler as lr_scheduler


In [2]:
# All the prep
monthly_basins = ['animas_r_at_durango', 'boise_r_nr_boise', 'boysen_reservoir_inflow', 'colville_r_at_kettle_falls', 'detroit_lake_inflow', 'dillon_reservoir_inflow',
    'fontenelle_reservoir_inflow', 'green_r_bl_howard_a_hanson_dam', 'hungry_horse_reservoir_inflow', 'libby_reservoir_inflow',
    'missouri_r_at_toston','owyhee_r_bl_owyhee_dam', 'pecos_r_nr_pecos', 'pueblo_reservoir_inflow',
    'ruedi_reservoir_inflow', 'skagit_ross_reservoir', 'snake_r_nr_heise', 'stehekin_r_at_stehekin', 'sweetwater_r_nr_alcova',
    'taylor_park_reservoir_inflow', 'virgin_r_at_virtin', 'weber_r_nr_oakley', 'yampa_r_nr_maybell',
]


USGS_basins = ['animas_r_at_durango', 'boise_r_nr_boise', 'boysen_reservoir_inflow', 'colville_r_at_kettle_falls', 'detroit_lake_inflow', 'dillon_reservoir_inflow',   
    'green_r_bl_howard_a_hanson_dam', 'hungry_horse_reservoir_inflow', 'libby_reservoir_inflow', 'merced_river_yosemite_at_pohono_bridge', 'missouri_r_at_toston',
    'owyhee_r_bl_owyhee_dam', 'pecos_r_nr_pecos', 'pueblo_reservoir_inflow',    'san_joaquin_river_millerton_reservoir', 'snake_r_nr_heise', 'stehekin_r_at_stehekin',
    'sweetwater_r_nr_alcova', 'taylor_park_reservoir_inflow', 'virgin_r_at_virtin', 'weber_r_nr_oakley', 'yampa_r_nr_maybell',
]

basins = list(set(monthly_basins + USGS_basins))


selected_years = range(2000,2024,2)

era5_folder = '/data/Hydra_Work/Rodeo_Data/era5'
era5 = csv_dictionary(era5_folder, basins, years=selected_years)
era5 = add_day_of_year_column(era5)

flow_folder = '/data/Hydra_Work/Rodeo_Data/train_monthly_naturalized_flow'
flow = csv_dictionary(flow_folder, monthly_basins)
flow = filter_rows_by_year(flow, 1998)

climatology_file_path = '/data/Hydra_Work/Rodeo_Data/climate_indices.csv'
climate_indices = pd.read_csv(climatology_file_path)
climate_indices['date'] = pd.to_datetime(climate_indices['date'])
climate_indices.set_index('date', inplace = True)
climate_indices.drop('Unnamed: 0', axis = 1, inplace = True)
climate_indices = climate_indices[~climate_indices.index.duplicated(keep='first')]

root_folder = '/data/Hydra_Work/Rodeo_Data/seasonal_forecasts'
seasonal_forecasts = read_nested_csvs(root_folder)

USGS_flow_folder = '/data/Hydra_Work/Rodeo_Data/USGS_streamflows'
USGS_flow = csv_dictionary(USGS_flow_folder, USGS_basins)

Static_variables = pd.read_csv('/data/Hydra_Work/Rodeo_Data/static_indices.csv', index_col= 'site_id')

# Convert monthly flow values to daily flow estimates
daily_flow = {}

# Iterate through the dictionary and apply generate_daily_flow to each DataFrame
for key, df in flow.items():
    daily_flow[key] = generate_daily_flow(df, persistence_factor=0.7)

# Replacing monhtly data for normalised USGS when available
daily_flow = use_USGS_flow_data(daily_flow, USGS_flow)

# Introducing the data from San_jaoqin and Merced, normalised by the yearly flow given
path = '/data/Hydra_Work/Rodeo_Data/USGS_streamflows/san_joaquin_river_millerton_reservoir.csv'
name = 'san_joaquin_river_millerton_reservoir'
normalising_path = '/data/Hydra_Work/Rodeo_Data/train_yearly/san_joaquin_river_millerton_reservoir.csv'

USGS_to_daily_df_yearly(daily_flow, path, name, normalising_path)

path = '/data/Hydra_Work/Rodeo_Data/USGS_streamflows/merced_river_yosemite_at_pohono_bridge.csv'
name = 'merced_river_yosemite_at_pohono_bridge'
normalising_path = '/data/Hydra_Work/Rodeo_Data/train_yearly/merced_river_yosemite_at_pohono_bridge.csv'

USGS_to_daily_df_yearly(daily_flow, path, name, normalising_path)

path = '/data/Hydra_Work/Rodeo_Data/USGS_streamflows/detroit_lake_inflow.csv'
name = 'detroit_lake_inflow'
normalising_path = '/data/Hydra_Work/Rodeo_Data/train_yearly/detroit_lake_inflow.csv'

USGS_to_daily_df_yearly(daily_flow, path, name, normalising_path)

climate_scaler_filename = '/data/Hydra_Work/Rodeo_Data/scalers/climate_normalization_scaler.save'
climate_scaler = joblib.load(climate_scaler_filename) 
climate_indices = pd.DataFrame(climate_scaler.transform(climate_indices), columns=climate_indices.columns, index=climate_indices.index)

era5_scaler_filename = '/data/Hydra_Work/Rodeo_Data/scalers/era5_scaler.save'
era5_scaler = joblib.load(era5_scaler_filename) 
era5 = {key: pd.DataFrame(era5_scaler.transform(df), columns=df.columns, index=df.index) for key, df in era5.items()}

for basin, df in daily_flow.items(): 
    flow_scaler_filename = f'/data/Hydra_Work/Rodeo_Data/scalers/flows/{basin}_flow_scaler.save'
    flow_scaler = joblib.load(flow_scaler_filename) 
    daily_flow[basin] = pd.DataFrame(flow_scaler.transform(df), columns=df.columns, index=df.index)

seasonal_scaler_filename = "/data/Hydra_Work/Rodeo_Data/scalers/seasonal_scaler.save"
seasonal_scaler = joblib.load(seasonal_scaler_filename)
seasonal_forecasts = {key: pd.DataFrame(seasonal_scaler.transform(df), columns=df.columns, index=df.index ) for key, df in seasonal_forecasts.items()}

static_scaler_filename = '/data/Hydra_Work/Rodeo_Data/scalers/static_scaler.save'
static_scaler = joblib.load(static_scaler_filename) 
Static_variables = pd.DataFrame(static_scaler.transform(Static_variables), columns=Static_variables.columns, index=Static_variables.index)

climatological_flows = {}

for basin, df in daily_flow.items():
    # Extract day of year and flow values
    df['day_of_year'] = df.index.dayofyear

    grouped = df.groupby('day_of_year')['daily_flow'].quantile([0.1, 0.5, 0.9]).unstack(level=1)

    climatological_flows[basin] = pd.DataFrame({
        'day_of_year': grouped.index,
        '10th_percentile_flow': grouped[0.1],
        '50th_percentile_flow': grouped[0.5],
        '90th_percentile_flow': grouped[0.9]
    })
    
    climatological_flows[basin].set_index('day_of_year', inplace=True)

    # Drop the temporary 'day_of_year' column from the original dataframe
    df.drop(columns='day_of_year', inplace=True)

criterion = SumPinballLoss(quantiles = [0.1, 0.5, 0.9])

basin = 'animas_r_at_durango' 
All_Dates = daily_flow[basin].index[
    ((daily_flow[basin].index.month < 6) | ((daily_flow[basin].index.month == 6) & (daily_flow[basin].index.day < 25))) &
    ((daily_flow[basin].index.year % 2 == 0) | ((daily_flow[basin].index.month > 10) | ((daily_flow[basin].index.month == 10) & (daily_flow[basin].index.day >= 1))))
]
All_Dates = All_Dates[All_Dates.year > 1998]


# Validation Year
Val_Dates = All_Dates[All_Dates.year == 2022]
All_Dates = All_Dates[All_Dates.year < 2022]


basin_to_remove = 'sweetwater_r_nr_alcova'

if basin_to_remove in basins:
    basins.remove(basin_to_remove)


seed = 42 ; torch.manual_seed(seed) ; random.seed(seed) ; np.random.seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

days  = 90
hidden_variables_size = 17

LR = 1e-3
static_size = np.shape(Static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 3
History_Statistics_in_forcings = 5*2

head_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
head_output_size = 3

# Be careful of this: Trying to unpickle estimator MinMaxScaler from version 1.3.0 when using version 1.4.1.post1

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-

# Tuning individual basins

In [3]:
LR = 1e-3
static_size = np.shape(Static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 3
History_Statistics_in_forcings = 5*2

input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3




In [4]:
def define_models(hidden_size, num_layers, dropout, bidirectional, learning_rate, copies = 3, output_size = 3, input_size = input_size, days = 90, hidden_variables_size = hidden_variables_size, device = device):
    models = {}
    params_to_optimize = {}
    optimizers = {}
    schedulers = {}
    for copy in range(copies):
        models[copy] = Hydra_LSTM_Block(input_size, hidden_size, num_layers, output_size, H0_sequences_size=days * hidden_variables_size, dropout= dropout, bidirectional= bidirectional)
        models[copy].to(device)
        params_to_optimize[copy] = list(models[copy].parameters())

        optimizers[copy] = torch.optim.Adam(params_to_optimize[copy], lr= learning_rate, weight_decay = 1e-3)
        schedulers[copy] = lr_scheduler.CosineAnnealingLR(optimizers[copy], T_max=1e4)

    return models, params_to_optimize, optimizers, schedulers

def update_final_parameters(Final_Parameters, basin, min_val_loss_parameters, min_val_loss):
    Final_Parameters['basin'].append(basin)
    Final_Parameters['hidden_size'].append(min_val_loss_parameters[0])
    Final_Parameters['num_layers'].append(min_val_loss_parameters[1])
    Final_Parameters['dropout'].append(min_val_loss_parameters[2])
    Final_Parameters['bidirectional'].append(min_val_loss_parameters[3])
    Final_Parameters['learning_rate'].append(min_val_loss_parameters[4])
    Final_Parameters['val_loss'].append(min_val_loss)


In [17]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper
# Fixed parameters
total_epochs = 30
n_epochs = 1  # Epochs between tests
group_lengths = np.arange(180)
batch_size = 1
copies = 3

# parameters to tune
hidden_sizes = [16, 64, 128]
num_layers =  [1,3]
dropout = [0.1, 0.4]
bidirectional = [False, True]
learning_rate = [1e-2, 1e-3, 1e-5]

# Set up configuration space
config_space = {
    "hidden_size": tune.grid_search(hidden_sizes),
    "num_layers": tune.grid_search(num_layers),
    "dropout": tune.grid_search(dropout),
    "bidirectional": tune.grid_search(bidirectional),
    "learning_rate": tune.grid_search(learning_rate)
}




In [7]:

def train_model(config, basin):

    models, params_to_optimize, optimizers, schedulers = define_models(
        config["hidden_size"], config["num_layers"], config["dropout"],
        config["bidirectional"], config["learning_rate"], copies=copies)
    
    print('Defining Models works')
    
    total_epochs = 30
    losses, val_losses = [], []
    early_stopper = EarlyStopper(patience=4, min_delta=0.01)
    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}
        print('True Epoch is', epoch)

        for copy in range(copies):
            # loop through copies
            # Need to fix the outputs of No_Body_Model_Run
            # Need to set specialized = False as only give one model, and basins need to be replaced with a list of just the basin in it
            train_losses[copy] = No_Body_Model_Run(All_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper=early_stopper, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialized=False)
            epoch_val_losses[copy] = No_Body_Model_Run(Val_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper=early_stopper, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialized=False)

        loss = np.mean(list(train_losses.values()))
        val_loss = np.mean(list(epoch_val_losses.values())).mean()

        print('Training loss is', loss)
        losses.append(loss)
        # Validation
        print('Validation loss is', val_loss)
        val_losses.append(val_loss)

        if early_stopper.early_stop(val_loss):
            break

    return val_loss


In [53]:
def train_model(config):

    All_Dates = ray.get(All_Dates_id)  
    Val_Dates = ray.get(Val_Dates_id)  
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)

    basin = 'stehekin_r_at_stehekin'

    copies = 3
    import sys
    
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')

    print(device)
    
    # def define_models(hidden_size, num_layers, dropout, bidirectional, learning_rate, copies = 3, output_size = 3, input_size = input_size, days = 90, hidden_variables_size = hidden_variables_size, device = device):
    #     models = {}
    #     params_to_optimize = {}
    #     optimizers = {}
    #     schedulers = {}
    #     for copy in range(copies):
    #         models[copy] = Hydra_LSTM_Block(input_size, hidden_size, num_layers, output_size, H0_sequences_size=days * hidden_variables_size, dropout= dropout, bidirectional= bidirectional)
    #         models[copy].to(device)
    #         params_to_optimize[copy] = list(models[copy].parameters())

    #         optimizers[copy] = torch.optim.Adam(params_to_optimize[copy], lr= learning_rate, weight_decay = 1e-3)
    #         schedulers[copy] = lr_scheduler.CosineAnnealingLR(optimizers[copy], T_max=1e4)
                    
    #     return models, params_to_optimize, optimizers, schedulers
    
    models, params_to_optimize, optimizers, schedulers = define_models(
    config["hidden_size"], config["num_layers"], config["dropout"],
    config["bidirectional"], config["learning_rate"], copies=copies, device = device)
  
    print('Defining Models works')

    losses, val_losses = [], []
    early_stopper = EarlyStopper(patience=4, min_delta=0.01)



    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}

        for copy in range(copies):

             # Need to fix the outputs of No_Body_Model_Run
            train_losses[copy] = No_Body_Model_Run(All_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper=early_stopper, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised=False)
            epoch_val_losses[copy] = No_Body_Model_Run(Val_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper=early_stopper, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialised=False)

        loss = np.mean(list(train_losses.values()))
        val_loss = np.mean(list(epoch_val_losses.values())).mean()

        tune.report({'val_loss' : val_loss})
        print('Training loss is', loss)
        losses.append(loss)
        # Validation
        print('Validation loss is', val_loss)
        val_losses.append(val_loss)

        if early_stopper.early_stop(val_loss):
            break
    val_loss = 5
    return val_loss

    


In [55]:
from ray import train, tune

ray.shutdown()
# /data/Hydra_Work/Competition_Functions/
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
#ray.init(runtime_env = my_runtime_env)
         
All_Dates_id = ray.put(All_Dates)  
Val_Dates_id = ray.put(Val_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(Static_variables)


2024-03-18 14:17:53,300	INFO worker.py:1724 -- Started a local Ray instance.


In [56]:

def objective(config):  
    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    
    print('Device available is', device)
    

    score = train_model(config) # Have training loop in here that outputs loss of model
    return {"score": score}


def dummy_objective(config):  
    print('In dummy objective')
    import os
    # current_directory = os.getcwd()
    # print("Current working directory:", current_directory)
    print(os.environ['dummy_val'])
    score = 0
    return {"score": score}

# Can use fractions of GPU
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space=config_space) 

results = tuner.fit()
print(results.get_best_result(metric="score", mode="min").config)

0,1
Current time:,2024-03-18 14:20:00
Running for:,00:02:06.03
Memory:,9.9/125.9 GiB

Trial name,# failures,error file
objective_4f98e_00000,1,"/home/gbmc/ray_results/objective_2024-03-18_14-17-54/objective_4f98e_00000_0_bidirectional=False,dropout=0.1000,hidden_size=16,learning_rate=0.0010,num_layers=1_2024-03-18_14-17-54/error.txt"
objective_4f98e_00001,1,"/home/gbmc/ray_results/objective_2024-03-18_14-17-54/objective_4f98e_00001_1_bidirectional=False,dropout=0.1000,hidden_size=64,learning_rate=0.0010,num_layers=1_2024-03-18_14-17-54/error.txt"
objective_4f98e_00002,1,"/home/gbmc/ray_results/objective_2024-03-18_14-17-54/objective_4f98e_00002_2_bidirectional=False,dropout=0.1000,hidden_size=128,learning_rate=0.0010,num_layers=1_2024-03-18_14-17-54/error.txt"
objective_4f98e_00003,1,"/home/gbmc/ray_results/objective_2024-03-18_14-17-54/objective_4f98e_00003_3_bidirectional=False,dropout=0.1000,hidden_size=16,learning_rate=0.0010,num_layers=3_2024-03-18_14-17-54/error.txt"
objective_4f98e_00004,1,"/home/gbmc/ray_results/objective_2024-03-18_14-17-54/objective_4f98e_00004_4_bidirectional=False,dropout=0.1000,hidden_size=64,learning_rate=0.0010,num_layers=3_2024-03-18_14-17-54/error.txt"
objective_4f98e_00005,1,"/home/gbmc/ray_results/objective_2024-03-18_14-17-54/objective_4f98e_00005_5_bidirectional=False,dropout=0.1000,hidden_size=128,learning_rate=0.0010,num_layers=3_2024-03-18_14-17-54/error.txt"

Trial name,status,loc,bidirectional,dropout,hidden_size,learning_rate,num_layers
objective_4f98e_00000,ERROR,136.156.133.98:279411,False,0.1,16,0.001,1
objective_4f98e_00001,ERROR,136.156.133.98:279417,False,0.1,64,0.001,1
objective_4f98e_00002,ERROR,136.156.133.98:279426,False,0.1,128,0.001,1
objective_4f98e_00003,ERROR,136.156.133.98:279427,False,0.1,16,0.001,3
objective_4f98e_00004,ERROR,136.156.133.98:279432,False,0.1,64,0.001,3
objective_4f98e_00005,ERROR,136.156.133.98:279437,False,0.1,128,0.001,3


[36m(objective pid=279411)[0m Device available is cuda




[36m(objective pid=279411)[0m cuda
[36m(objective pid=279432)[0m Defining Models works
[36m(objective pid=279432)[0m Start of run
[36m(objective pid=279432)[0m defaultdict(<class 'int'>, {})
[36m(objective pid=279432)[0m Epoch 1: Training Mode
[36m(objective pid=279432)[0m loss difference : -1.5062580216060582
[36m(objective pid=279432)[0m Climatology loss: 19.50871040964917
[36m(objective pid=279437)[0m Device available is cuda[32m [repeated 5x across cluster][0m
[36m(objective pid=279432)[0m cuda[32m [repeated 5x across cluster][0m
[36m(objective pid=279417)[0m Defining Models works[32m [repeated 5x across cluster][0m
[36m(objective pid=279432)[0m Start of run[32m [repeated 6x across cluster][0m
[36m(objective pid=279432)[0m defaultdict(<class 'int'>, {})[32m [repeated 6x across cluster][0m
[36m(objective pid=279432)[0m Epoch 1: Validation Mode
[36m(objective pid=279417)[0m Epoch 1: Training Mode[32m [repeated 5x across cluster][0m
[36m(object

2024-03-18 14:19:57,593	ERROR tune_controller.py:1374 -- Trial task failed for trial objective_4f98e_00004
Traceback (most recent call last):
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/_private/worker.py", line 2624, in get
    raise value.as_instanceof_cause()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File

RuntimeError: No best trial found for the given metric: score. This means that no trial has reported this metric, or all values reported for this metric are NaN. To not ignore NaN values, you can set the `filter_nan_and_inf` arg to False.

In [None]:
# Fixed parameters
total_epochs = 20
n_epochs = 1 # Epochs between tests
group_lengths = np.arange(180)
batch_size = 1
copies = 2

# parameters to tune
hidden_sizes = [64] #[16, 64, 128]
num_layers =  [2] #[1,3]
dropout = [0.3] # [0.1, 0.4]
bidirectional = [True] #[False, True]
learning_rate = [1e-3] #[1e-2, 1e-3, 1e-5]

model_combinations = list(itertools.product(hidden_sizes, num_layers, dropout, bidirectional, learning_rate))

# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_Single_Models/'
Final_Parameters = {'basin': [], 'hidden_size': [], 'num_layers': [], 'dropout': [], 'bidirectional': [], 'learning_rate': [], 'val_loss': []}

for basin in basins: 
    min_val_loss = float('inf')

    print(basin)

    for model_combination in model_combinations:
        hidden_size, num_layers, dropout, bidirectional, learning_rate = model_combination
        
        models, params_to_optimize, optimizers, schedulers = define_models(hidden_size, num_layers, dropout, bidirectional, learning_rate, copies = copies)        
        early_stopper = EarlyStopper(patience = 3, min_delta= 2)
        losses, val_losses = [], []

        for epoch in range(total_epochs): 
            # Training
            train_losses = {}
            epoch_val_losses = {}
            print('True Epoch is', epoch)

            for copy in range(copies):
                # loop through copies
                # Need tof fix the outputs of No_Body_Model_Run
                # Need to set specialised = False as only give one model, and basins need to be replaced with a list of just the basin in it
                dummy = No_Body_Model_Run(All_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts, Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper = early_stopper, n_epochs=n_epochs, batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised = False)
                train_losses[copy] = dummy[0][0] - dummy[1][0]
                dummy_val = No_Body_Model_Run(Val_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts, Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper = early_stopper, n_epochs=n_epochs, batch_size=batch_size, group_lengths=group_lengths, Train_Mode= False, device=device, specialised = False)
                epoch_val_losses[copy] = dummy_val[0][0] - dummy_val[1][0]

            loss = np.mean(list(train_losses.values())).mean()
            val_loss = np.mean(list(epoch_val_losses.values())).mean()


            print('Training loss is', loss)
            losses.append(loss)
            # Validation
            print('Validation loss is', val_loss)
            val_losses.append(val_loss)

            if val_loss < min_val_loss:
                min_val_loss = val_loss
                min_val_loss_parameters = model_combination
                min_val_losses = val_losses

                save_path = os.path.join(model_dir, f'{basin}_model.pth')
                torch.save(models[1], save_path)


            if early_stopper.early_stop(val_loss):
                break
    
    # Save best parameters and corresponding validation loss for that basin to a dictionry
    update_final_parameters(Final_Parameters, basin, min_val_loss_parameters, min_val_loss)

# Convert dictionary to a csv file
df = pd.DataFrame(Final_Parameters)
df.to_csv('/data/Hydra_Work/Post_Rodeo_Work/Tuned_Single_Models/Specialised_final_parameters.csv', index=False)


In [None]:
# Loading models
Tuned_Models = {}
for basin in basins:
    Tuned_Models[basin] = torch.load(f'/data/Hydra_Work/Post_Rodeo_Work/Tuned_Single_Models/basin.pth')


# New attempt at raytuner

In [None]:
from ray import train

ray.shutdown()
#ray.init()
ray.init(runtime_env = {"py_modules": ['/data/Hydra_Work/Competition_Functions']})

All_Dates_id = ray.put(All_Dates)  
Val_Dates_id = ray.put(Val_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(Static_variables)

def trainable(config):  # Pass a "config" dictionary into your trainable.


    All_Dates = ray.get(All_Dates_id)  
    Val_Dates = ray.get(Val_Dates_id)  
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)

    basin = 'stehekin_r_at_stehekin'

    copies = 3
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')


    sys.path.append('/data/Hydra_Work/Competition_Functions') 
    import ML_Functions
    print('Imported')
    from ML_Functions import Hydra_LSTM_Block, EarlyStopper, SumPinballLoss, No_Body_Model_Run, Prepare_Batch, Prepare_Basin, Get_Relevant_Dates, Process_History
    from Processing_Functions import process_forecast_date, process_seasonal_forecasts, fit_fourier_to_h0, Get_History_Statistics

    def define_models(hidden_size, num_layers, dropout, bidirectional, learning_rate, copies = 3, output_size = 3, input_size = input_size, days = 90, hidden_variables_size = hidden_variables_size, device = device):
        models = {}
        params_to_optimize = {}
        optimizers = {}
        schedulers = {}
        for copy in range(copies):
            models[copy] = Hydra_LSTM_Block(input_size, hidden_size, num_layers, output_size, H0_sequences_size=days * hidden_variables_size, dropout= dropout, bidirectional= bidirectional)
            models[copy].to(device)
            params_to_optimize[copy] = list(models[copy].parameters())

            optimizers[copy] = torch.optim.Adam(params_to_optimize[copy], lr= learning_rate, weight_decay = 1e-3)
            schedulers[copy] = lr_scheduler.CosineAnnealingLR(optimizers[copy], T_max=1e4)
                    
        return models, params_to_optimize, optimizers, schedulers
    


    models, params_to_optimize, optimizers, schedulers = define_models(
    config["hidden_size"], config["num_layers"], config["dropout"],
    config["bidirectional"], config["learning_rate"], copies=copies)

    losses, val_losses = [], []
    early_stopper = EarlyStopper(patience=4, min_delta=0.01)




    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}
        print('True Epoch is', epoch)

        for copy in range(copies):
            train_losses[copy] = 0
            # loop through copies
            # Need to fix the outputs of No_Body_Model_Run
    #         # Need to set specialized = False as only give one model, and basins need to be replaced with a list of just the basin in it
            train_losses[copy] = No_Body_Model_Run(All_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper=early_stopper, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialized=False)
            # epoch_val_losses[copy] = No_Body_Model_Run(Val_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
            #     Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper=early_stopper, n_epochs=n_epochs,
            #     batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialized=False)

        loss = np.mean(list(train_losses.values()))
        val_loss = np.mean(list(epoch_val_losses.values())).mean()

        print('Training loss is', loss)
        losses.append(loss)
        # Validation
        print('Validation loss is', val_loss)
        val_losses.append(val_loss)

        if early_stopper.early_stop(val_loss):
            break
        
        train.report({"score": val_loss})  # Send the score to Tune.

space = {
    "hidden_size": tune.choice(hidden_sizes),
    "num_layers": tune.choice(num_layers),
    "dropout": tune.grid_search(dropout),
    "bidirectional": tune.choice(bidirectional),
    "learning_rate": tune.grid_search(learning_rate)
}
tuner = tune.Tuner(
    trainable, param_space=space, tune_config=tune.TuneConfig(num_samples=10)
)
tuner.fit()



# Tuning General Model

In [None]:
LR = 1e-3
static_size = np.shape(Static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 3
History_Statistics_in_forcings = 5*2

input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3


In [None]:



def update_final_parameters_general(Final_Parameters, min_val_loss_parameters, min_val_loss):
    Final_Parameters['hidden_size'].append(min_val_loss_parameters[0])
    Final_Parameters['num_layers'].append(min_val_loss_parameters[1])
    Final_Parameters['dropout'].append(min_val_loss_parameters[2])
    Final_Parameters['bidirectional'].append(min_val_loss_parameters[3])
    Final_Parameters['learning_rate'].append(min_val_loss_parameters[4])
    Final_Parameters['val_loss'].append(min_val_loss)

In [None]:
# Fixed parameters
total_epochs = 20
n_epochs = 1 # Epochs between tests
group_lengths = np.arange(180)
batch_size = 1
copies = 2

# parameters to tune
# I tuned to 128,2,0.1,False,1e-3 
hidden_sizes = [64, 128, 256]
num_layers = [1,3]
dropout = [0.1, 0.4]
bidirectional =  [False, True]
learning_rate = [1e-2, 1e-3, 1e-5]

model_combinations = list(itertools.product(hidden_sizes, num_layers, dropout, bidirectional, learning_rate))

# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/'
Final_Parameters = {'hidden_size': [], 'num_layers': [], 'dropout': [], 'bidirectional': [], 'learning_rate': [], 'val_loss': []}


min_val_loss = float('inf')

for model_combination in model_combinations:
    hidden_size, num_layers, dropout, bidirectional, learning_rate = model_combination
    
    models, params_to_optimize, optimizers, schedulers = define_models(hidden_size, num_layers, dropout, bidirectional, learning_rate, copies = copies)        
    early_stopper = EarlyStopper(patience=10, min_delta=0.01)
    losses, val_losses = [], []

    for epoch in range(total_epochs): 
        # Training
        train_losses = {}
        epoch_val_losses = {}
        print('True Epoch is', epoch)

        for copy in range(copies):
            # loop through copies
            # Need tof fix the outputs of No_Body_Model_Run
            train_losses[copy] = No_Body_Model_Run(All_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts, Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper = early_stopper, n_epochs=n_epochs, batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised = False)
            epoch_val_losses[copy] = No_Body_Model_Run(Val_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts, Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper = early_stopper, n_epochs=n_epochs, batch_size=batch_size, group_lengths=group_lengths, Train_Mode= False, device=device, specialised = False)


        loss = np.mean(list(train_losses.values()))
        val_loss = np.mean(list(epoch_val_losses.values())).mean()

        print('Training loss is', loss)
        losses.append(loss)
        # Validation
        print('Validation loss is', val_loss)
        val_losses.append(val_loss)

        if val_loss < min_val_loss:
            min_val_loss = val_loss
            min_val_loss_parameters = model_combination
            min_val_losses = val_losses

            save_path = os.path.join(model_dir, f'General_model.pth')
            torch.save(models[1], save_path)

            


        if early_stopper.early_stop(val_loss):
            break

# Save best parameters and corresponding validation loss for that basin to a dictionry
update_final_parameters(Final_Parameters, min_val_loss_parameters, min_val_loss)

# Convert dictionary to a csv file
df = pd.DataFrame(Final_Parameters)
df.to_csv('/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/General_final_parameters.csv', index=False)


In [None]:
General_Model = torch.load('/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/General_model.pth')

# Tuning Hydra Model

In [None]:
def define_models_hydra(body_input_size, body_hidden_size, body_num_layers, body_output_size, body_dropout, body_bidirectional,
                                 head_input_size, head_hidden_size, head_num_layers, head_output_size, head_dropout, head_bidirectional,
                        learning_rate_body, learning_rate_head, learning_rate_general_head, LR, basins = basins,  hidden_variables_size = hidden_variables_size, days = 90, device = device, copies = 3):
    Hydra_Bodys = {}
    model_heads = {}
    General_Hydra_Heads = {}

    params_to_optimize = {}
    optimizers = {}
    schedulers = {}
    for copy in range(copies):
        Hydra_Bodys[copy], model_heads[copy], General_Hydra_Heads[copy], optimizers[copy], schedulers[copy] = initialize_models_optimizers(basins, body_input_size, body_hidden_size, body_num_layers, body_output_size, body_dropout, body_bidirectional,
                            head_input_size, head_hidden_size, head_num_layers, head_output_size, head_dropout, head_bidirectional,
                            days, hidden_variables_size, learning_rate_body, learning_rate_head, learning_rate_general_head, LR, device)

    return Hydra_Bodys, General_Hydra_Heads, model_heads, optimizers, schedulers 

def update_final_parameters_hydra(Final_Parameters, min_val_loss_parameters, min_val_loss):
    # Append body parameters
    Final_Parameters['body_hidden_size'].append(min_val_loss_parameters[0])
    Final_Parameters['body_num_layers'].append(min_val_loss_parameters[1])
    Final_Parameters['body_dropout'].append(min_val_loss_parameters[2])
    Final_Parameters['body_learning_rate'].append(min_val_loss_parameters[3])
    Final_Parameters['body_output'].append(min_val_loss_parameters[4])
    # Append head parameters
    Final_Parameters['head_hidden_size'].append(min_val_loss_parameters[5])
    Final_Parameters['head_num_layers'].append(min_val_loss_parameters[6])
    Final_Parameters['head_dropout'].append(min_val_loss_parameters[7])
    Final_Parameters['head_learning_rate'].append(min_val_loss_parameters[8])
    # Append validation loss
    Final_Parameters['val_loss'].append(min_val_loss)

In [None]:
# Fixed parameters
total_epochs = 20
n_epochs = 1 # Epochs between tests
group_lengths = np.arange(180)
batch_size = 1
copies = 1
body_input_size = input_size
head_output_size = 3

# parameters to tune
# chose 128, 2, 0.1, 1e-3, 6, 32, 1, 0.4, 1e-3
body_hidden_sizes = [256] #[64, 128, 256]
body_num_layers =  [2] #[1, 3]
body_dropouts = [0.1] # [0.1, 0.4]
body_learning_rates = [1e-3] # [1e-2, 1e-3, 1e-5]
body_outputs = [10] #[3, 6, 10]


head_hidden_sizes = [64] # [16, 32, 64]
head_num_layers = [1] #[1, 3]
head_dropouts = [0.4] # [0.1, 0.4, 0.7]
head_learning_rates = [1e-3] #[1e-2, 1e-3, 1e-5]

bidirectionals = [True] #[False, True]


# Generate combinations for body and head
model_combinations = list(itertools.product(body_hidden_sizes, body_num_layers, body_dropouts, bidirectionals, body_learning_rates, body_outputs, head_hidden_sizes, head_num_layers, head_dropouts, bidirectionals, head_learning_rates))

# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_Hydra_Model/'
Final_Parameters = {'body_hidden_size': [], 'body_num_layers': [], 'body_dropout': [], 'body_learning_rate': [],
                    'head_hidden_size': [], 'head_num_layers': [], 'head_dropout': [], 'bidirectional': [], 'head_learning_rate': [],
                    'val_loss': []}


min_val_loss = float('inf')


In [None]:

for model_combination in model_combinations:
    body_hidden_size, body_num_layer, body_dropout, bidirectional, body_learning_rate, body_output_size, head_hidden_size, head_num_layer, head_dropout, bidirectional, head_learning_rate = model_combination
    general_head_learning_rate = head_learning_rate
    head_input_size = body_output_size
    Hydra_Bodys, General_Hydra_Heads, model_heads, optimizers, schedulers  = define_models_hydra(body_input_size, body_hidden_size, body_num_layer, body_output_size, body_dropout, bidirectional,
                                 head_input_size, head_hidden_size, head_num_layer, head_output_size, head_dropout, bidirectional,
                                body_learning_rate, head_learning_rate, general_head_learning_rate, LR)        
    
    early_stopper = EarlyStopper(patience=10, min_delta=0.01)
    general_losses, specific_losses, general_val_losses, specific_val_losses = [], [], [], []

    for epoch in range(total_epochs): 
        # Training
        train_general_losses = {}
        train_specific_losses = {}
        epoch_val_general_losses = {}
        epoch_val_specific_losses = {}
        climate_losses = {}
        print('True Epoch is', epoch)

        for copy in range(copies):
            # loop through copies
            # Need tof fix the outputs of No_Body_Model_Run
            train_general_losses[copy], train_specific_losses[copy], climate_losses[copy] = Model_Run(All_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts, Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper = early_stopper, n_epochs= n_epochs, batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, feed_forcing = False)

            epoch_val_general_losses[copy], epoch_val_specific_losses[copy], climate_losses[copy] = Model_Run(All_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts, Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper = early_stopper, n_epochs= n_epochs, batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, feed_forcing = False)


        general_loss = np.mean(list(train_general_losses.values()))
        specific_loss = np.mean(list(train_specific_losses.values()))
        
        epoch_val_general_loss = np.mean(list(epoch_val_general_losses.values())).mean()
        epoch_val_specific_loss = np.mean(list(epoch_val_specific_losses.values())).mean()

        print('General Training loss is', general_loss)
        print('Specific Training loss is', specific_loss)
        general_losses.append(general_loss)
        specific_losses.append(specific_loss)

        # Validation
        print('General Validation loss is', epoch_val_general_loss)
        general_val_losses.append(epoch_val_general_loss)
        print('Specific Validation loss is', epoch_val_specific_loss)
        specific_val_losses.append(epoch_val_specific_loss)

        val_loss = epoch_val_general_loss
        val_losses = general_val_losses
        # Need to make a decision on how to determine a model performs better, specific vs general head performance
        # Could also just do both

        if general_loss < min_val_loss:
            min_val_loss = val_loss
            min_val_loss_parameters = model_combination
            min_val_losses = val_losses

            for basin in basins:
                # Construct the full path for saving the model
                save_path = os.path.join(model_dir, f'{basin}_Head.pth')
                # Save the model 
                torch.save(model_heads[0][f'{basin}'], save_path)


            save_path = os.path.join(model_dir, f'General_Head.pth')
            # Save the model
            torch.save(General_Hydra_Heads[0], save_path)

            save_path = os.path.join(model_dir, f'General_Body.pth')
            # Save the model
            torch.save(Hydra_Bodys[0], save_path)

            


        if early_stopper.early_stop(val_loss):
            break

# Save best parameters and corresponding validation loss for that basin to a dictionry
update_final_parameters(Final_Parameters, min_val_loss_parameters, min_val_loss)

# Convert dictionary to a csv file
df = pd.DataFrame(Final_Parameters)
df.to_csv('/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/General_final_parameters.csv', index=False)

In [None]:
train_losses.values()