In [1]:
import os
import pickle
import joblib
import pandas as pd
import numpy as np
import random
import itertools

import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')

import sys
sys.path.append('/data/Hydra_Work/Competition_Functions') 
from Processing_Functions import process_forecast_date, process_seasonal_forecasts
from Data_Transforming import read_nested_csvs, generate_daily_flow, use_USGS_flow_data, USGS_to_daily_df_yearly

sys.path.append('/data/Hydra_Work/Pipeline_Functions')
from Folder_Work import filter_rows_by_year, csv_dictionary, add_day_of_year_column

sys.path.append('/data/Hydra_Work/Post_Rodeo_Work/ML_Functions.py')
from Full_LSTM_ML_Functions import Specific_Heads, Google_Model_Block, SumPinballLoss, EarlyStopper, Model_Run, No_Body_Model_Run



from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim.lr_scheduler as lr_scheduler





# Making the cross validation set

Cross Validation decisions:
- It looks like I only have 10 years right now, and if the results are good I can keep it that way (justify by independent years)
- Training set of 80% and Validation of 20% is fine, makes sense to make the Validation years adjacent instead of random, probably doesn't matter much but adjacent minimises theyre connection with the years in the training dataset
- This means theres only 5 folds which shouldn't take forever to do 
- There's an issue right now where my validation set is also my test set, how much can I get around this?
- I could test a 70-20-10 set up, from the looks of it there won't be that much loss in performance by reducing the training set by 12%? 
- If I assume the years are independent then it doesn't matter which dates I choose for validation years when I've got a specific testing year
- K -fold cross validation means splitting the data in k chunks and choosing a different chunk for each, p-fold involves choosing all possible combinations of size p for the splits

Structure of the folders:
- Can do Validation_Models/Val_Years/Model/.pth, bs Model/Val_Years/.pth
- I think the first makes more sense, I would realy want to ompare models trained over the same years


Restructuring Current code:
- I want to fit this whole thing into a for loop so I can run it
- Alternatively I can have the validation years as a parameter in the config_space and just let the code run as is
- It would be nice to make the prep section smaller visually, or hidden somewhere else


In [2]:
import sys

def get_env():
    sp = sys.path[1].split("/")
    if "envs" in sp:
        return sp[sp.index("envs") + 1]
    else:
        return ""
    
print(get_env())

Hydra_Code


In [3]:
basins = ['libby_reservoir_inflow',  'owyhee_r_bl_owyhee_dam',  'san_joaquin_river_millerton_reservoir',  'taylor_park_reservoir_inflow',
 'boise_r_nr_boise', 'green_r_bl_howard_a_hanson_dam', 'weber_r_nr_oakley', 'detroit_lake_inflow', 'virgin_r_at_virtin', 'dillon_reservoir_inflow',
 'pueblo_reservoir_inflow', 'hungry_horse_reservoir_inflow', 'stehekin_r_at_stehekin', 'pecos_r_nr_pecos', 'snake_r_nr_heise', 'yampa_r_nr_maybell',
 'colville_r_at_kettle_falls', 'missouri_r_at_toston', 'merced_river_yosemite_at_pohono_bridge', 'animas_r_at_durango','fontenelle_reservoir_inflow', 'boysen_reservoir_inflow']

selected_years = range(2000,2024,2)


base_dir = "/data/Hydra_Work/Scaled_Data"

# Define dictionaries and DataFrames
dictionaries = ['era5', 'seasonal_forecasts', 'daily_flow', 'climatological_flows']

dataframes = ['climate_indices', 'static_variables']

# Function to load dictionaries
def load_dictionaries(base_dir, names):
    loaded_dicts = {}
    for name in names:
        file_path = os.path.join(base_dir, f"{name}.pkl")
        with open(file_path, 'rb') as file:
            locals()[name] = pickle.load(file)
    return locals()

# Function to load DataFrames
def load_dataframes(base_dir, names):
    loaded_dfs = {}
    for name in names:
        file_path = os.path.join(base_dir, f"{name}.pkl")
        locals()[name] = pd.read_pickle(file_path)
    return locals()

saved_dicts = load_dictionaries(base_dir, dictionaries)
saved_dfs = load_dataframes(base_dir, dataframes)

for name in dictionaries:
    locals()[name] = saved_dicts[name]

for name in dataframes:
    locals()[name] = saved_dfs[name]

criterion = SumPinballLoss(quantiles = [0.1, 0.5, 0.9])

basin = 'animas_r_at_durango' 
All_Dates = daily_flow[basin].index[
    ((daily_flow[basin].index.month < 6) | ((daily_flow[basin].index.month == 6) & (daily_flow[basin].index.day < 24))) &
    ((daily_flow[basin].index.year % 2 == 0) | ((daily_flow[basin].index.month > 10) | ((daily_flow[basin].index.month == 10) & (daily_flow[basin].index.day >= 1))))
]
All_Dates = All_Dates[All_Dates.year > 1998]


# Validation Year
# Val_Dates = All_Dates[All_Dates.year >= 2020]
# All_Dates = All_Dates[All_Dates.year < 2020]

device = torch.device('cuda' if torch.cuda.
                is_available() else 'cpu')

criterion = SumPinballLoss(quantiles = [0.1, 0.5, 0.9])

basin = 'animas_r_at_durango' 
All_Dates = daily_flow[basin].index[
    ((daily_flow[basin].index.month < 6) | ((daily_flow[basin].index.month == 6) & (daily_flow[basin].index.day < 24))) &
    ((daily_flow[basin].index.year % 2 == 0) | ((daily_flow[basin].index.month > 10) | ((daily_flow[basin].index.month == 10) & (daily_flow[basin].index.day >= 1))))
]
All_Dates = All_Dates[All_Dates.year > 1998]


# Validation Year
# Val_Dates = All_Dates[All_Dates.year >= 2018]
# Val_Dates = Val_Dates[Val_Dates.year <= 2022]
# Train_Dates = All_Dates[All_Dates.year == 2022]

seed = 42 ; torch.manual_seed(seed) ; random.seed(seed) ; np.random.seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

days  = 90




# Tuning individual basins

In [4]:
LR = 1e-3
static_size = np.shape(static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 0 #3 # THis is about climatology, not climate indices
History_Statistics_in_forcings = 0  #5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
hindcast_input_size = 9 # 17 if we include climate indices



In [15]:
Retrain_Basins = basins
for basin in basins:
    loss_path = f'/data/Hydra_Work/Tuning/Week_Ahead_Models_V2/Specific_Week_Ahead_Models/{basin}_specific_loss.txt'
    
    with open(loss_path, 'r') as file:
    # Read the entire contents of the file
        Overall_Best_Val_Loss = float(file.read())
    
    if Overall_Best_Val_Loss < -0.05:
        Retrain_Basins = list(set(Retrain_Basins) - set([basin]))

In [51]:
# Do we want hindcast and forecast num-layers to be different?
def define_models(hindcast_input_size, forecast_input_size, hidden_size, num_layers, dropout, bidirectional, learning_rate, copies = 3, forecast_output_size = 3, device = device):
    models = {}
    params_to_optimize = {}
    optimizers = {}
    schedulers = {}
    
    hindcast_output_size = forecast_output_size
    for copy in range(copies):
        models[copy] = Google_Model_Block(hindcast_input_size, forecast_input_size, hindcast_output_size, forecast_output_size, hidden_size, num_layers, device, dropout, bidirectional)
        
        models[copy].to(device)
        params_to_optimize[copy] = list(models[copy].parameters())
        # Probably should be doing 1e-2 and 10
        optimizers[copy] = torch.optim.Adam(params_to_optimize[copy], lr= learning_rate, weight_decay = 1e-4)
        schedulers[copy] = lr_scheduler.CosineAnnealingLR(optimizers[copy], T_max = 100000,)
        #.StepLR(optimizers[copy], 5, gamma=0.5)
        

    return models, params_to_optimize, optimizers, schedulers


In [50]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper
from ray.tune.search.optuna import OptunaSearch
import optuna

# Fixed parameters
total_epochs = 200
n_epochs = 1  # Epochs between tests
group_lengths = [7] #np.arange(180) 7 Day ahead for streamlined version
batch_size = 256
copies = 1

# parameters to tune
hidden_sizes = [128] # 64 converged upon
num_layers =  [1]
dropout = [0.1]
bidirectional = [False] #[True, False]
learning_rate = [1e-1, 1e-2] #[1e-3, 1e-5]


# Set up configuration space
config_space = {

    "hidden_size": tune.grid_search(hidden_sizes),
    "num_layers": tune.grid_search(num_layers),
    "dropout": tune.grid_search(dropout),
    "bidirectional": tune.grid_search(bidirectional),
    "learning_rate": tune.grid_search(learning_rate),
    "basin":  tune.grid_search(basins),
    'test_year': tune.grid_search(list(np.arange(2000,2024,2)) )

}




In [8]:
def train_model(config):

    All_Dates = ray.get(All_Dates_id)  

    years = list(np.arange(2000,2024,2))
    test_year = config['test_year']
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = All_Dates[All_Dates.year.isin(val_years)]
    Train_Dates = All_Dates[All_Dates.year.isin(train_years)]

    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)

    val_loss = 1000

    basin = config["basin"]

    save_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/Specific_LSTM_Model/{basin}_specific.pth'
    loss_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/Specific_LSTM_Model/{basin}_specific_loss.txt'

    
    if not os.path.exists(loss_path):
        # If the file does not exist, create it and write val_loss to it
        with open(loss_path, 'w') as file:
            file.write('%f' % val_loss)
    
    copies = 1
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')
    
    models, params_to_optimize, optimizers, schedulers = define_models(hindcast_input_size, forecast_input_size,
    config["hidden_size"], config["num_layers"], config["dropout"],
    config["bidirectional"], config["learning_rate"], copies=copies, device = device)

    losses, val_losses = [], []

    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}

        for copy in range(copies):

             # Need to fix the outputs of No_Body_Model_Run
            train_losses[copy], Climate_Loss = No_Body_Model_Run(All_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised=True)
            epoch_val_losses[copy], Climate_Loss = No_Body_Model_Run(Val_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialised=True)

        loss = np.mean(list(train_losses.values())) - Climate_Loss
        

        candidate_val_loss = ((np.mean( list(epoch_val_losses.values()) ).mean() - Climate_Loss)[0])/np.mean(Climate_Loss)
        val_loss = np.min([val_loss, candidate_val_loss ])
        if candidate_val_loss == val_loss:
             torch.save(models[0], save_path)
             
        
        # Check best loss so far for this model
        with open(loss_path, 'r') as file:
            # Read the entire contents of the file
            Overall_Best_Val_Loss = float(file.read())

        if val_loss < Overall_Best_Val_Loss:
            torch.save(models[0], save_path)

            with open(loss_path, 'w') as f:
                f.write('%f' % val_loss)


        ray.train.report({'val_loss' : val_loss})
        #print(candidate_val_loss)
        losses.append(loss)
        val_losses.append(val_loss)


    return val_loss

    


In [9]:
from ray import train, tune


ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)   
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(static_variables)


2024-06-04 19:05:46,123	INFO worker.py:1724 -- Started a local Ray instance.


In [10]:
asha_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric='val_loss',
    mode='min',
    max_t=100,
    grace_period=20,
    reduction_factor=2,
    brackets=1,
)


plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 20,
    grace_period=50,
    mode="min",
)


In [11]:
# Stehekin gives :True	0.4	64	0.001	3 Even looking at overall min, and for animas r at durango
# T-tests suggests: Bidirectional good, dropout unimportant, 16 bad, 64 vs 128 unimportant. All models that imrpvoed loss wre bidirectional
# Libby seemed to want an single layer
# San Joaqin is just hard, score of 9.4: {'hidden_size': 64, 'num_layers': 1, 'dropout': 0.4, 'bidirectional': False, 'learning_rate': 1e-05}


runs = 12
# At weekly:
# Animas has {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.1, 'bidirectional': False, 'learning_rate': 1e-05}, 64,3,0.1. Results for 64, 1, 0.1, True identical
def objective(config):   

    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    
    #print('Device available is', device)
    

    score = train_model(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}

#basin = 'stehekin_r_at_stehekin'

#, search_alg = optuna_search
optuna_tune_config = tune.TuneConfig(scheduler=asha_scheduler)
tune_config = tune.TuneConfig(scheduler=asha_scheduler)
running_tune_config = tune.TuneConfig()

run_config=train.RunConfig(stop= plateau_stopper)

# Note using < 1gb per run stops pylance from crashing I think
# Without Optun
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 15/runs, "gpu": 1/runs}), param_space=config_space, tune_config = tune_config, run_config = run_config) 
# With Optuna
#tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space = optuna_config_space, tune_config = optuna_tune_config, run_config = run_config) 

results = tuner.fit()
best_config = results.get_best_result(metric="val_loss", mode="min").config
print(best_config)



# Define the file path where you want to save the best configuration
file_path = f"/data/Hydra_Work/Tuning/Config_Text/{basin}_best_config.txt"
# Open the file in write mode and save the configuration
with open(file_path, "w") as f:
    f.write(str(best_config))

print("Best configuration saved to:", file_path)


0,1
Current time:,2024-06-05 11:57:04
Running for:,16:51:15.83
Memory:,30.3/125.9 GiB

Trial name,status,loc,basin,bidirectional,dropout,hidden_size,learning_rate,num_layers,test_year,iter,total time (s),val_loss
objective_73fcb_00000,TERMINATED,136.156.133.98:951779,libby_reservoir_b5a0,False,0.1,128,0.1,1,2000,20,785.683,1.53938
objective_73fcb_00001,TERMINATED,136.156.133.98:951846,owyhee_r_bl_owy_d480,False,0.1,128,0.1,1,2000,50,1833.36,0.250933
objective_73fcb_00002,TERMINATED,136.156.133.98:951915,san_joaquin_riv_4750,False,0.1,128,0.1,1,2000,64,2518.9,-0.0138664
objective_73fcb_00003,TERMINATED,136.156.133.98:951948,taylor_park_res_ca80,False,0.1,128,0.1,1,2000,20,792.186,2.18262
objective_73fcb_00004,TERMINATED,136.156.133.98:951956,boise_r_nr_boise,False,0.1,128,0.1,1,2000,40,1578.87,0.591562
objective_73fcb_00005,TERMINATED,136.156.133.98:951978,green_r_bl_howa_e560,False,0.1,128,0.1,1,2000,50,1955.47,0.0406169
objective_73fcb_00006,TERMINATED,136.156.133.98:951980,weber_r_nr_oakley,False,0.1,128,0.1,1,2000,20,741.973,1.99107
objective_73fcb_00007,TERMINATED,136.156.133.98:951986,detroit_lake_inflow,False,0.1,128,0.1,1,2000,50,1833.96,0.0782084
objective_73fcb_00008,TERMINATED,136.156.133.98:951995,virgin_r_at_virtin,False,0.1,128,0.1,1,2000,40,1577.42,0.331732
objective_73fcb_00009,TERMINATED,136.156.133.98:951996,dillon_reservoi_b320,False,0.1,128,0.1,1,2000,20,792.279,1.70859




2024-06-04 19:18:14,588	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '52467d93')}
2024-06-04 19:18:21,827	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '52467d93')}
2024-06-04 19:18:58,011	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '52467d93')}
2024-06-04 19:18:59,724	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '52467d93')}
2024-06-04 19:19:04,562	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '52467d93')}
2024-06-04 19:19:04,870	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '52467d93')}
2024-06-04

{'hidden_size': 128, 'num_layers': 1, 'dropout': 0.1, 'bidirectional': False, 'learning_rate': 0.01, 'basin': 'boysen_reservoir_inflow', 'test_year': 2006}
Best configuration saved to: /data/Hydra_Work/Tuning/Config_Text/boysen_reservoir_inflow_best_config.txt


In [16]:
results_df = results.get_dataframe()
results_df[results_df['val_loss'] < -0.1][['val_loss', 'config/basin', 'config/test_year']]

Unnamed: 0,val_loss,config/basin,config/test_year
35,-0.171296,pecos_r_nr_pecos,2000
40,-0.142887,merced_river_yosemite_at_pohono_bridge,2000
42,-0.295092,fontenelle_reservoir_inflow,2000
67,-0.230011,owyhee_r_bl_owyhee_dam,2002
79,-0.517411,pecos_r_nr_pecos,2002
...,...,...,...
513,-0.274982,detroit_lake_inflow,2022
517,-0.157572,hungry_horse_reservoir_inflow,2022
519,-0.198643,pecos_r_nr_pecos,2022
524,-0.367187,merced_river_yosemite_at_pohono_bridge,2022


In [None]:
Safe_Basins = list(results_df[results_df['val_loss'] < -0.05]['config/basin'].values)
Retrain_Basins = list(set(basins) - set(Safe_Basins))
Retrain_Basins

In [None]:
from scipy import stats

results_df = results.get_dataframe()
columns_to_drop = ['timestamp', 'checkpoint_dir_name', 'done', 'training_iteration', 
                   'trial_id', 'date', 'time_this_iter_s', 'time_total_s', 'pid', 
                   'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore']

# Drop the columns
results_df.drop(columns=columns_to_drop, inplace=True)

val_loss_bidirectional_true = results_df[results_df['config/num_layers'] == 3]['val_loss']
val_loss_bidirectional_false = results_df[results_df['config/num_layers'] == 1]['val_loss']

# Perform a t-test
t_statistic, p_value = stats.ttest_ind(val_loss_bidirectional_true, val_loss_bidirectional_false)

# Print the results
print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

# Check if the difference in means is statistically significant
alpha = 0.05  # Significance level
if p_value < alpha:
    print("The difference in mean val_loss is statistically significant.")
else:
    print("The difference in mean val_loss is not statistically significant.")

In [None]:
# Loading models
Tuned_Models = {}
for basin in basins:
    Tuned_Models[basin] = torch.load(f'/data/Hydra_Work/Post_Rodeo_Work/Tuned_Single_Models/basin.pth')


# Tuning General Model

In [52]:
LR = 1e-3
static_size = np.shape(static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 0 #3
History_Statistics_in_forcings = 0 #5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
hindcast_input_size = 8 #8 for no flow

In [59]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper

# Fixed parameters
total_epochs = 200
n_epochs = 1 # Epochs between tests
group_lengths = [7] # 
batch_size = 64
copies = 1

# parameters to tune
# I tuned to 128,2,0.4,False,1e-3 
hidden_sizes = [128]
num_layers = [1]
dropout = [0.1]
bidirectional =  [False]
learning_rate = [1e-3]

config_space = {
    "hidden_size": tune.grid_search(hidden_sizes),
    "num_layers": tune.grid_search(num_layers),
    "dropout": tune.grid_search(dropout),
    "bidirectional": tune.grid_search(bidirectional),
    "learning_rate": tune.grid_search(learning_rate),
    'test_year': tune.grid_search(list(np.arange(2022,2024,2)) )
    
}


# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/'

In [60]:
    years = list(np.arange(2000,2024,2))
    test_year = 2000
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = [date for date in All_Dates if date.year in val_years]
    Train_Dates = [date for date in All_Dates if date.year in train_years]


In [61]:
def train_model_general(config):
    
    All_Dates = ray.get(All_Dates_id)  
    
    years = list(np.arange(2000,2024,2))
    test_year = config['test_year']
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = All_Dates[All_Dates.year.isin(val_years)]
    Train_Dates = All_Dates[All_Dates.year.isin(train_years)]
    
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)

    copies = 1
    
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')
    
    save_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/General_LSTM_No_Flow_Model/General_LSTM.pth'
    loss_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/General_LSTM_No_Flow_Model/General_LSTM_loss.txt'

    val_loss = 1000
    
    
    if not os.path.exists(loss_path):
        # If the file does not exist, create it and write val_loss to it
        with open(loss_path, 'w') as file:
            file.write('%f' % val_loss)
    
  
    models, params_to_optimize, optimizers, schedulers = define_models(hindcast_input_size, forecast_input_size,
    config["hidden_size"], config["num_layers"], config["dropout"],
    config["bidirectional"], config["learning_rate"], copies=copies, device = device)

    losses, val_losses = [], []

    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}

        for copy in range(copies):

             # Need to fix the outputs of No_Body_Model_Run
            train_losses[copy], Climate_Loss = No_Body_Model_Run(Train_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised=False)
            epoch_val_losses[copy], Climate_Loss = No_Body_Model_Run(Val_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialised=False)

        loss = np.mean(list(train_losses.values())) - Climate_Loss


        candidate_val_loss = ((np.mean(list(epoch_val_losses.values())).mean() - Climate_Loss)[0])/np.mean(Climate_Loss)
        val_loss = np.min([val_loss, candidate_val_loss ])
        
        # Check best loss so far for this model
        with open(loss_path, 'r') as file:
            # Read the entire contents of the file
            Overall_Best_Val_Loss = float(file.read())

        if val_loss < Overall_Best_Val_Loss:
            torch.save(models[0], save_path)

            with open(loss_path, 'w') as f:
                f.write('%f' % val_loss)

            
               
        ray.train.report({'val_loss' : val_loss})

        losses.append(loss)
        val_losses.append(candidate_val_loss)
        #print(val_losses)
        #print(candidate_val_loss)
    return val_loss


In [62]:
from ray import train, tune



ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)  
Val_Dates_id = ray.put(Val_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(static_variables)

2024-06-06 11:18:14,293	INFO worker.py:1724 -- Started a local Ray instance.


In [63]:
# asha_scheduler = ASHAScheduler(
#     time_attr='training_iteration',
#     metric='val_loss',
#     mode='min',
#     max_t=100,
#     grace_period=20,
#     reduction_factor=2,
#     brackets=1,
# )


plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 300,
    grace_period=80,
    mode="min",
)


In [64]:
# {'hidden_size': 256, 'num_layers': 3, 'dropout': 0.1, 'bidirectional': True, 'learning_rate': 0.001}
# 7 Days:  128	2	0.4	False	0.001
def objective(config):  
    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    
    #print('Device available is', device)
    

    score = train_model_general(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}


#, search_alg = optuna_search
# optuna_tune_config = tune.TuneConfig(scheduler=asha_scheduler)
# tune_config = tune.TuneConfig(scheduler=asha_scheduler)
run_config=train.RunConfig(stop= plateau_stopper)

runs = 3
# Without Optuna
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 15/runs , "gpu": 1/runs }), param_space=config_space, run_config = run_config) 
# With Optuna
#tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space = optuna_config_space, tune_config = optuna_tune_config, run_config = run_config) 

results = tuner.fit()
# try get_best_checkpoint, or change val to be maximum of current val_loss and previous ones
best_config = results.get_best_result(metric="val_loss", mode="min").config
print(best_config)
file_path = f"/data/Hydra_Work/Tuning/Config_Text/General_Model_best_config.txt"

# Open the file in write mode and save the configuration
with open(file_path, "w") as f:
    f.write(str(best_config))

print("Best configuration saved to:", file_path)


0,1
Current time:,2024-06-06 11:54:18
Running for:,00:36:02.36
Memory:,38.4/125.9 GiB

Trial name,status,loc,bidirectional,dropout,hidden_size,learning_rate,num_layers,test_year,iter,total time (s),val_loss
objective_78318_00000,RUNNING,136.156.133.98:1011212,False,0.1,128,0.01,1,2022,189,2149.45,0.358954
objective_78318_00002,RUNNING,136.156.133.98:1011214,False,0.1,128,0.0001,1,2022,189,2151.44,0.0858477
objective_78318_00001,TERMINATED,136.156.133.98:1011213,False,0.1,128,0.001,1,2022,160,1852.67,0.0767424




2024-06-06 11:49:12,313	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '52467d93')}


In [None]:
results_df = results.get_dataframe()
results_df[results_df['val_loss'] < -0.15] 

In [None]:
General_Model = torch.load('/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/General_model.pth')



# Tuning Hydra Model

In [None]:
def define_models_hydra(body_hindcast_input_size, body_forecast_input_size, body_output_size, body_hidden_size, body_num_layers, body_dropout,
                        head_hidden_size, head_num_layers, head_forecast_output_size, head_dropout, bidirectional, basins,
                        learning_rate_general_head, learning_rate_head, learning_rate_body, LR = 1e-3, 
                        additional_specific_head_hindcast_input_size = 1, additional_specific_head_forecast_input_size = 0,
                        copies=1, device=None):
    Hydra_Bodys = {}
    Basin_Heads = {}
    General_Heads = {}   
    general_optimizers = {}
    optimizers = {}
    schedulers = {}
    
    body_forecast_output_size = body_output_size
    body_hindcast_output_size = body_output_size
    
    # Define head hindcast size as head-forecast for simplicty
    head_hindcast_output_size = head_forecast_output_size
    specific_head_hindcast_output_size = head_forecast_output_size
    specific_head_forecast_output_size = head_forecast_output_size
    specific_head_hidden_size = head_hidden_size
    specific_head_num_layers = head_num_layers
    
    # Head takes Body as inputs
    #head_hindcast_input_size = body_hindcast_input_size 
    head_hindcast_input_size = body_hindcast_output_size
    head_forecast_input_size = body_forecast_output_size
    
    # Specific input size
    specific_head_hindcast_input_size = head_hindcast_input_size + additional_specific_head_hindcast_input_size
    specific_head_forecast_input_size = head_forecast_input_size + additional_specific_head_forecast_input_size
    
    for copy in range(copies):
        Hydra_Bodys[copy] = Google_Model_Block(body_hindcast_input_size, body_forecast_input_size, body_hindcast_output_size, body_forecast_output_size, body_hidden_size, body_num_layers, device, body_dropout, bidirectional)
        General_Heads[copy] = Google_Model_Block(head_hindcast_input_size, head_forecast_input_size, head_hindcast_output_size, head_forecast_output_size, head_hidden_size, head_num_layers, device, head_dropout, bidirectional)
        Basin_Heads[copy] = Specific_Heads(basins, specific_head_hindcast_input_size, specific_head_forecast_input_size, specific_head_hindcast_output_size, specific_head_forecast_output_size, specific_head_hidden_size, specific_head_num_layers, device, head_dropout, bidirectional)


        specific_head_parameters = list()
        for basin, model in Basin_Heads[copy].items():
            specific_head_parameters += list(model.parameters())

        optimizers[copy] = torch.optim.Adam(
        # Extra LR is the global learning rate, not really important
        [
            {"params": General_Heads[copy].parameters(), "lr": learning_rate_general_head},
            {"params": specific_head_parameters, "lr": learning_rate_head},
            {"params": Hydra_Bodys[copy].parameters(), "lr": learning_rate_body},
        ],
        lr=LR, weight_decay = 1e-4) #1e-4 good so far, 3 not so food

        general_optimizers[copy] = torch.optim.Adam(
        # Extra LR is the global learning rate, not really important
        [
            {"params": General_Heads[copy].parameters(), "lr": learning_rate_general_head},
            {"params": Hydra_Bodys[copy].parameters(), "lr": learning_rate_body},
        ],
        lr=LR, )
        schedulers[copy] = lr_scheduler.StepLR(optimizers[copy], 1, gamma=0.99) #.CosineAnnealingLR(optimizers[copy], T_max= 100000, eta_min= 1e-4,)
         #
        
    return Hydra_Bodys, General_Heads, Basin_Heads, optimizers, schedulers, general_optimizers 

In [None]:
LR = 1e-3
static_size = np.shape(static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 0 #3
History_Statistics_in_forcings = 0 # 5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
body_hindcast_input_size = 8
body_forecast_input_size = forecast_input_size


Overall_Best_Val_Loss = 999

In [None]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper

# Fixed parameters
total_epochs = 300
n_epochs = 1 # Epochs between tests
group_lengths = [7] #np.arange(180)

copies = 1
head_output_size = 3

# parameters to tune
# chose 128, 2, 0.1, 1e-3, 6, 32, 1, 0.4, 1e-3
body_hidden_sizes =  [128]
body_num_layers = [1]
body_dropouts = [0.0] #[0.1, 0.4]
body_learning_rates = [1e-4] 
body_outputs = [4, 6] # Say hindcast and forecasts have same outputrs body_hindcast_output_size


head_hidden_sizes = [32]
head_num_layers = [1]
head_dropouts = [0.0] #[0.1, 0.4, 0.7]
head_learning_rates = [1e-2, 1e-4]
batch_size = [256]
LR = 1e-3
bidirectionals = [False]
spec_multiplier = [1]

config_space = {
    "body_hidden_size": tune.grid_search(body_hidden_sizes),
    "body_num_layer": tune.grid_search(body_num_layers),
    "body_dropout": tune.grid_search(body_dropouts),
    "bidirectional": tune.grid_search(bidirectionals),
    "body_output": tune.grid_search(body_outputs),
    "body_learning_rate": tune.grid_search(body_learning_rates),
    "head_hidden_size": tune.grid_search(head_hidden_sizes),
    "head_num_layer": tune.grid_search(head_num_layers),
    "head_dropout": tune.grid_search(head_dropouts),
    "head_learning_rate": tune.grid_search(head_learning_rates),
    "spec_multiplier": tune.grid_search(spec_multiplier),
    'batch_size': tune.grid_search(batch_size),
    'test_year': tune.grid_search([2010] ) #list(np.arange(2000,2024,2))

    #"general_head_learning_rate": tune.grid_search(head_learning_rates),
}

# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_Hydra_Model/'



In [None]:
def train_model_hydra(config):

    All_Dates = ray.get(All_Dates_id)  
    
    
    years = list(np.arange(2000,2024,2))
    test_year = config['test_year']
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = All_Dates[All_Dates.year.isin(val_years)]
    Train_Dates = All_Dates[All_Dates.year.isin(train_years)]

    
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)  
  
                        
    body_save_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/General_Head_Model/Hydra_Body_LSTM.pth'
    head_save_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/General_Head_Model/Hydra_Head_LSTM.pth'
    basin_heads_save_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/Basin_Head_Model'
    
    loss_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/General_Head_Model/Hydra_LSTM_loss.txt'

    val_loss = 1000
    
    
    if not os.path.exists(loss_path):
        # If the file does not exist, create it and write val_loss to it
        with open(loss_path, 'w') as file:
            file.write('%f' % val_loss)


    copies = 1
    warmup = 4
    best_val_loss = 999
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')
   

    general_head_learning_rate = config['body_learning_rate']
    Hydra_Bodys, General_Hydra_Heads, model_heads, optimizers, schedulers, general_optimizers  = define_models_hydra(body_hindcast_input_size, body_forecast_input_size, config['body_output'],
                                config['body_hidden_size'], config['body_num_layer'], config['body_dropout'], 
                                config['head_hidden_size'], config['head_num_layer'], 3, config['head_dropout'], config['bidirectional'], basins,
                                general_head_learning_rate, config['head_learning_rate'], config['body_learning_rate'], LR, device = device
                                )
     

    batch_size = config['batch_size']
                                                
    general_losses, specific_losses, general_val_losses, specific_val_losses, val_losses = [], [], [], [], []

    # Initialise, with dummy scheduler
    for copy in range(copies):
        # Initialise
        dummy_scheduler = lr_scheduler.StepLR(optimizers[copy],step_size = warmup, gamma = 0.8)

        Model_Run(All_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
            Static_variables, general_optimizers[copy], dummy_scheduler, criterion, early_stopper= None, n_epochs= warmup,
            batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, feed_forcing = False)

            
    for epoch in range(total_epochs):
        train_general_losses = {}
        train_specific_losses = {}
        train_climate_losses = {}
        epoch_val_general_losses = {}
        epoch_val_specific_losses = {}
        climate_losses = {}
        
        for copy in range(copies):
                        

            # Full Training
            train_general_losses[copy], train_specific_losses[copy], train_climate_losses[copy] = Model_Run(Train_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs= n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, feed_forcing = False, spec_multiplier = config["spec_multiplier"])
            epoch_val_general_losses[copy], epoch_val_specific_losses[copy], climate_losses[copy] = Model_Run(Val_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs= n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, feed_forcing = False)

        general_loss = np.mean(list(train_general_losses.values()))
        specific_loss = np.mean(list(train_specific_losses.values()))
        train_climate_loss = np.mean(list(train_climate_losses.values()))
        climate_loss = np.mean(list(climate_losses.values()))
        
        epoch_val_general_loss = np.mean(list(epoch_val_general_losses.values())).mean()
        epoch_val_specific_loss = np.mean(list(epoch_val_specific_losses.values())).mean()
        
        
        general_losses.append(general_loss)
        specific_losses.append(specific_loss)
        specific_val_losses.append(epoch_val_specific_loss)
        general_val_losses.append(epoch_val_general_loss)

        val_loss = 0.5*(epoch_val_general_loss + epoch_val_specific_loss)
        candidate_val_loss = ((val_loss.mean() - climate_loss))/np.mean(climate_loss)
        best_val_loss = np.min([best_val_loss, candidate_val_loss ])
         
        with open(loss_path, 'r') as file:
            # Read the entire contents of the file
            Overall_Best_Val_Loss = float(file.read())

        if best_val_loss < Overall_Best_Val_Loss:
            with open(loss_path, 'w') as f:
                f.write('%f' % best_val_loss)

            torch.save(Hydra_Bodys[0], body_save_path)
            torch.save(General_Hydra_Heads[0], head_save_path)
            for basin in basins:
                torch.save(model_heads[0][basin], f"{basin_heads_save_path}/{basin}.path")
                
            
               
        ray.train.report({'val_loss' : best_val_loss})
        #print('Validation Loss', val_losses)
        #print('Training Loss', general_losses/train_climate_loss )
        print(candidate_val_loss)
        val_losses.append(candidate_val_loss)

    return best_val_loss



In [None]:
from ray import train, tune


ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(static_variables)


2024-06-06 07:33:55,008	INFO worker.py:1724 -- Started a local Ray instance.


In [None]:
asha_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric='val_loss',
    mode='min',
    max_t=100,
    grace_period=20,
    reduction_factor=3,
    brackets=1,
)


plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 800,
    grace_period=20,
    mode="min",
)


In [None]:
runs_per_iteration = 4
def objective(config):  
    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    

    score = train_model_hydra(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}


run_config=train.RunConfig(stop= plateau_stopper)
# Can use fractions of GPU
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 15/runs_per_iteration, "gpu": 1/(runs_per_iteration)}), param_space=config_space, run_config = run_config) 

results = tuner.fit()
best_config = results.get_best_result(metric="val_loss", mode="min").config
print(best_config)
file_path = f"/data/Hydra_Work/Tuning/Config_Text/Hydral_Model_best_config.txt"

# Open the file in write mode and save the configuration
with open(file_path, "w") as f:
    f.write(str(best_config))

print("Best configuration saved to:", file_path)

0,1
Current time:,2024-06-06 07:37:58
Running for:,00:03:53.43
Memory:,42.3/125.9 GiB

Trial name,status,loc,batch_size,bidirectional,body_dropout,body_hidden_size,body_learning_rate,body_num_layer,body_output,head_dropout,head_hidden_size,head_learning_rate,head_num_layer,spec_multiplier,test_year,iter,total time (s),val_loss
objective_26ce5_00000,RUNNING,136.156.133.98:996294,256,False,0,128,0.0001,1,4,0,32,0.1,1,1,2010,15,224.87,0.20507
objective_26ce5_00001,RUNNING,136.156.133.98:996295,256,False,0,128,0.0001,1,4,0,32,0.01,1,1,2010,15,224.531,0.255037
objective_26ce5_00002,RUNNING,136.156.133.98:996296,256,False,0,128,0.0001,1,4,0,32,0.0001,1,1,2010,15,224.891,0.212736




[36m(objective pid=996294)[0m 0.9528351140947744
[36m(objective pid=996294)[0m 2.7010988372894116[32m [repeated 3x across cluster][0m
[36m(objective pid=996294)[0m 0.20506976474633834[32m [repeated 3x across cluster][0m
[36m(objective pid=996294)[0m 0.4268740338164566[32m [repeated 3x across cluster][0m
[36m(objective pid=996294)[0m 1.8464422703766754[32m [repeated 3x across cluster][0m
[36m(objective pid=996294)[0m 0.5746116993298312[32m [repeated 3x across cluster][0m
[36m(objective pid=996294)[0m 0.8285865921395753[32m [repeated 3x across cluster][0m
[36m(objective pid=996294)[0m 1.0315079504550688[32m [repeated 3x across cluster][0m
[36m(objective pid=996294)[0m 0.21510097065456504[32m [repeated 3x across cluster][0m
[36m(objective pid=996294)[0m 2.829953433962464[32m [repeated 3x across cluster][0m
[36m(objective pid=996294)[0m 1.3664495013943334[32m [repeated 3x across cluster][0m
[36m(objective pid=996294)[0m 1.3245506511754634[32m [r

2024-06-06 07:38:04,734	INFO tune.py:1042 -- Total run time: 239.97 seconds (233.43 seconds for the tuning loop).
Resume experiment with: Tuner.restore(path="/home/gbmc/ray_results/objective_2024-06-06_07-34-04", trainable=...)


{'body_hidden_size': 128, 'body_num_layer': 1, 'body_dropout': 0.0, 'bidirectional': False, 'body_output': 4, 'body_learning_rate': 0.0001, 'head_hidden_size': 32, 'head_num_layer': 1, 'head_dropout': 0.0, 'head_learning_rate': 0.1, 'spec_multiplier': 1, 'batch_size': 256, 'test_year': 2010}
Best configuration saved to: /data/Hydra_Work/Tuning/Config_Text/Hydral_Model_best_config.txt


In [161]:
results_df = results.get_dataframe()

In [33]:
results_df[results_df['val_loss'] < -0.1]#[['val_loss', 'config/body_output']]

KeyError: 'val_loss'