In [1]:
import os
import pickle
import joblib
import pandas as pd
import numpy as np
import random
import itertools

import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')

import sys
sys.path.append('/data/Hydra_Work/Competition_Functions') 
from Processing_Functions import process_forecast_date, process_seasonal_forecasts
from Data_Transforming import read_nested_csvs, generate_daily_flow, use_USGS_flow_data, USGS_to_daily_df_yearly

sys.path.append('/data/Hydra_Work/Pipeline_Functions')
from Folder_Work import filter_rows_by_year, csv_dictionary, add_day_of_year_column

sys.path.append('/data/Hydra_Work/Post_Rodeo_Work/ML_Functions.py')
from Full_LSTM_ML_Functions import Specific_Heads, Google_Model_Block, SumPinballLoss, EarlyStopper, Model_Run, No_Body_Model_Run, Indicator_LSTM_Run, define_models



from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim.lr_scheduler as lr_scheduler





# Making the cross validation set

Cross Validation decisions:
- It looks like I only have 10 years right now, and if the results are good I can keep it that way (justify by independent years)
- Training set of 80% and Validation of 20% is fine, makes sense to make the Validation years adjacent instead of random, probably doesn't matter much but adjacent minimises theyre connection with the years in the training dataset
- This means theres only 5 folds which shouldn't take forever to do 
- There's an issue right now where my validation set is also my test set, how much can I get around this?
- I could test a 10-20-10 set up, from the looks of it there won't be that much loss in performance by reducing the training set by 12%? 
- If I assume the years are independent then it doesn't matter which dates I choose for validation years when I've got a specific testing year
- K -fold cross validation means splitting the data in k chunks and choosing a different chunk for each, p-fold involves choosing all possible combinations of size p for the splits

Structure of the folders:
- Can do Validation_Models/Val_Years/Model/.pth, bs Model/Val_Years/.pth
- I think the first makes more sense, I would realy want to ompare models trained over the same years


Restructuring Current code:
- I want to fit this whole thing into a for loop so I can run it
- Alternatively I can have the validation years as a parameter in the config_space and just let the code run as is
- It would be nice to make the prep section smaller visually, or hidden somewhere else


In [2]:
basins = ['libby_reservoir_inflow',  'owyhee_r_bl_owyhee_dam',  'san_joaquin_river_millerton_reservoir',  'taylor_park_reservoir_inflow',
 'boise_r_nr_boise', 'green_r_bl_howard_a_hanson_dam', 'weber_r_nr_oakley', 'detroit_lake_inflow', 'virgin_r_at_virtin', 'dillon_reservoir_inflow',
 'pueblo_reservoir_inflow', 'hungry_horse_reservoir_inflow', 'stehekin_r_at_stehekin', 'pecos_r_nr_pecos', 'snake_r_nr_heise', 'yampa_r_nr_maybell',
 'colville_r_at_kettle_falls', 'missouri_r_at_toston', 'merced_river_yosemite_at_pohono_bridge', 'animas_r_at_durango','fontenelle_reservoir_inflow', 'boysen_reservoir_inflow']

selected_years = range(2000,2024,2)


base_dir = "/data/Hydra_Work/Scaled_Data"

# Define dictionaries and DataFrames
dictionaries = ['era5', 'seasonal_forecasts', 'daily_flow', 'climatological_flows']

dataframes = ['climate_indices', 'clustering_static_variables']

# Function to load dictionaries
def load_dictionaries(base_dir, names):
    loaded_dicts = {}
    for name in names:
        file_path = os.path.join(base_dir, f"{name}.pkl")
        with open(file_path, 'rb') as file:
            locals()[name] = pickle.load(file)
    return locals()

# Function to load DataFrames
def load_dataframes(base_dir, names):
    loaded_dfs = {}
    for name in names:
        file_path = os.path.join(base_dir, f"{name}.pkl")
        locals()[name] = pd.read_pickle(file_path)
    return locals()

saved_dicts = load_dictionaries(base_dir, dictionaries)
saved_dfs = load_dataframes(base_dir, dataframes)

for name in dictionaries:
    locals()[name] = saved_dicts[name]

for name in dataframes:
    locals()[name] = saved_dfs[name]


device = torch.device('cuda' if torch.cuda.
                is_available() else 'cpu')

criterion = SumPinballLoss(quantiles = [0.1, 0.5, 0.9])

basin = 'animas_r_at_durango' 
All_Dates = daily_flow[basin].index[
    ((daily_flow[basin].index.month < 6) | ((daily_flow[basin].index.month == 6) & (daily_flow[basin].index.day < 24))) &
    ((daily_flow[basin].index.year % 2 == 0) | ((daily_flow[basin].index.month > 10) | ((daily_flow[basin].index.month == 10) & (daily_flow[basin].index.day >= 1))))
]
All_Dates = All_Dates[All_Dates.year > 1998]


seed = 42 ; torch.manual_seed(seed) ; random.seed(seed) ; np.random.seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

days  = 90





In [3]:
static_variables = clustering_static_variables

# Tuning individual basins

In [40]:
LR = 1e-3
static_size = np.shape(static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 0 #3 # THis is about climatology, not climate indices
History_Statistics_in_forcings = 0  #5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
hindcast_input_size = 22 # 22 with flow



In [41]:
Retrain_Basins = basins
for basin in basins:
    loss_path = f'/data/Hydra_Work/Tuning/Week_Ahead_Models_V2/Specific_Week_Ahead_Models/{basin}_specific_loss.txt'
    
    with open(loss_path, 'r') as file:
    # Read the entire contents of the file
        Overall_Best_Val_Loss = float(file.read())
    
    if Overall_Best_Val_Loss < -0.05:
        Retrain_Basins = list(set(Retrain_Basins) - set([basin]))

In [42]:

def define_models(hindcast_input_size, forecast_input_size, hidden_size, num_layers, dropout, bidirectional, learning_rate, copies = 3, forecast_output_size = 3, device = device):
    models = {}
    params_to_optimize = {}
    optimizers = {}
    schedulers = {}
    
    hindcast_output_size = forecast_output_size
    for copy in range(copies):
        models[copy] = Google_Model_Block(hindcast_input_size, forecast_input_size, hindcast_output_size, forecast_output_size, hidden_size, num_layers, device, dropout, bidirectional)
        
        models[copy].to(device)
        params_to_optimize[copy] = list(models[copy].parameters())
        # Probably should be doing 1e-2 and 10
        optimizers[copy] = torch.optim.Adam(params_to_optimize[copy], lr= learning_rate, weight_decay = 1e-5)
        schedulers[copy] = lr_scheduler.StepLR(optimizers[copy], step_size= 5000, gamma=0.95) #.CosineAnnealingLR(optimizers[copy], T_max = 100000,)

        

    return models, params_to_optimize, optimizers, schedulers


In [43]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper
from ray.tune.search.optuna import OptunaSearch
import optuna

# Fixed parameters
total_epochs = 50 # Will be higher in reality, this is just to make the plots
n_epochs = 1  # Epochs between tests
group_lengths = [1] #np.arange(180) 1 Day ahead for streamlined version
batch_size = 256
copies = 1

# parameters to tune
hidden_sizes = [64] # 64 converged upon
num_layers =  [1]
dropout = [0]
bidirectional = [False] #[True, False]
learning_rate = [1e-2] #[1e-3, 1e-5]


# Set up configuration space
config_space = {

    "hidden_size": tune.grid_search(hidden_sizes),
    "num_layers": tune.grid_search(num_layers),
    "dropout": tune.grid_search(dropout),
    "bidirectional": tune.grid_search(bidirectional),
    "learning_rate": tune.grid_search(learning_rate),
    "basin":  tune.grid_search(basins), #basins
    'test_year': tune.grid_search(list(np.arange(2000,2024,2)) )

}




In [44]:
def train_model(config):

    All_Dates = ray.get(All_Dates_id)  

    years = list(np.arange(2000,2024,2))
    test_year = config['test_year']
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = All_Dates[All_Dates.year.isin(val_years)]
    Train_Dates = All_Dates[All_Dates.year.isin(train_years)]

    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)

    val_loss = 1000

    basin = config["basin"]

    save_path = f'/data/Hydra_Work/3_Day_No_Forecast_Validation_Models/{test_year}/Specific_LSTM_Model/{basin}_specific.pth'
    loss_path = f'/data/Hydra_Work/3_Day_No_Forecast_Validation_Models/{test_year}/Specific_LSTM_Model/{basin}_specific_loss.txt'

    
    if not os.path.exists(loss_path):
        # If the file does not exist, create it and write val_loss to it
        with open(loss_path, 'w') as file:
            file.write('%f' % val_loss)
    
    copies = 1
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')
    
    models, params_to_optimize, optimizers, schedulers = define_models(hindcast_input_size, forecast_input_size,
    config["hidden_size"], config["num_layers"], config["dropout"],
    config["bidirectional"], config["learning_rate"], copies=copies, device = device)

    losses, val_losses = [], []

    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}

        for copy in range(copies):

             # Need to fix the outputs of No_Body_Model_Run
            train_losses[copy], Climate_Loss = No_Body_Model_Run(Train_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised=True)
            epoch_val_losses[copy], Climate_Loss = No_Body_Model_Run(Val_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialised=True)

        loss = np.mean(list(train_losses.values())) - Climate_Loss
        

        candidate_val_loss = ((np.mean( list(epoch_val_losses.values()) ).mean() - Climate_Loss)[0])/np.mean(Climate_Loss)
        val_loss = np.min([val_loss, candidate_val_loss ])
        if candidate_val_loss == val_loss:
             torch.save(models[0], save_path)
             
        
        # Check best loss so far for this model
        with open(loss_path, 'r') as file:
            # Read the entire contents of the file
            Overall_Best_Val_Loss = float(file.read())

        if val_loss < Overall_Best_Val_Loss:
            torch.save(models[0], save_path)

            with open(loss_path, 'w') as f:
                f.write('%f' % val_loss)


        ray.train.report({'val_loss' : val_loss})
        #print(candidate_val_loss)
        losses.append(loss)
        val_losses.append(val_loss)


    return val_loss

    


In [45]:
from ray import train, tune


ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)   
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(static_variables)


2024-06-24 15:21:07,814	INFO worker.py:1724 -- Started a local Ray instance.


In [46]:
asha_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric='val_loss',
    mode='min',
    max_t=100,
    grace_period=20,
    reduction_factor=2,
    brackets=1,
)


plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 20,
    grace_period=70,
    mode="min",
)


In [47]:


runs = 12
# At weekly:
# Animas has {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.1, 'bidirectional': False, 'learning_rate': 1e-05}, 64,3,0.1. Results for 64, 1, 0.1, True identical
def objective(config):   

    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    
    #print('Device available is', device)
    

    score = train_model(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}

#basin = 'stehekin_r_at_stehekin'

#, search_alg = optuna_search
optuna_tune_config = tune.TuneConfig(scheduler=asha_scheduler)
tune_config = tune.TuneConfig(scheduler=asha_scheduler)
running_tune_config = tune.TuneConfig()

run_config=train.RunConfig(stop= plateau_stopper)

# Note using < 1gb per run stops pylance from crashing I think
# Without Optun
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 15/runs, "gpu": 1/(runs)}), param_space=config_space, tune_config = tune_config, run_config = run_config) 
# With Optuna
#tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space = optuna_config_space, tune_config = optuna_tune_config, run_config = run_config) 

results = tuner.fit()
best_config = results.get_best_result(metric="val_loss", mode="min").config
print(best_config)



# Define the file path where you want to save the best configuration
file_path = f"/data/Hydra_Work/Tuning/Config_Text/{basin}_best_config.txt"
# Open the file in write mode and save the configuration
with open(file_path, "w") as f:
    f.write(str(best_config))

print("Best configuration saved to:", file_path)


0,1
Current time:,2024-06-24 15:26:08
Running for:,00:04:57.99
Memory:,110.1/125.9 GiB

Trial name,status,loc,basin,bidirectional,dropout,hidden_size,learning_rate,num_layers,test_year,iter,total time (s),val_loss
objective_62db0_00000,RUNNING,136.156.133.98:2416447,libby_reservoir_6f10,False,0,64,0.01,1,2000,8.0,265.341,-0.847538
objective_62db0_00001,RUNNING,136.156.133.98:2416454,owyhee_r_bl_owy_09e0,False,0,64,0.01,1,2000,8.0,269.039,-0.659247
objective_62db0_00002,RUNNING,136.156.133.98:2416455,san_joaquin_riv_08d0,False,0,64,0.01,1,2000,8.0,266.739,-0.71921
objective_62db0_00003,RUNNING,136.156.133.98:2416523,taylor_park_res_1610,False,0,64,0.01,1,2000,8.0,268.684,-0.8882
objective_62db0_00004,RUNNING,136.156.133.98:2416526,boise_r_nr_boise,False,0,64,0.01,1,2000,8.0,261.894,-0.838796
objective_62db0_00005,RUNNING,136.156.133.98:2416527,green_r_bl_howa_9930,False,0,64,0.01,1,2000,8.0,267.129,-0.565892
objective_62db0_00006,RUNNING,136.156.133.98:2416534,weber_r_nr_oakley,False,0,64,0.01,1,2000,10.0,268.911,-0.860733
objective_62db0_00007,RUNNING,136.156.133.98:2416541,detroit_lake_inflow,False,0,64,0.01,1,2000,8.0,263.934,-0.747522
objective_62db0_00008,RUNNING,136.156.133.98:2416550,virgin_r_at_virtin,False,0,64,0.01,1,2000,10.0,273.957,-0.741776
objective_62db0_00009,RUNNING,136.156.133.98:2416552,dillon_reservoi_69c0,False,0,64,0.01,1,2000,8.0,262.391,-0.842688


In [None]:
results_df = results.get_dataframe()
results_df[results_df['val_loss'] < -0.1][['val_loss', 'config/basin', 'config/test_year']]

In [None]:
Safe_Basins = list(results_df[results_df['val_loss'] < -0.05]['config/basin'].values)
Retrain_Basins = list(set(basins) - set(Safe_Basins))
Retrain_Basins

In [None]:
from scipy import stats

results_df = results.get_dataframe()
columns_to_drop = ['timestamp', 'checkpoint_dir_name', 'done', 'training_iteration', 
                   'trial_id', 'date', 'time_this_iter_s', 'time_total_s', 'pid', 
                   'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore']

# Drop the columns
results_df.drop(columns=columns_to_drop, inplace=True)

val_loss_bidirectional_true = results_df[results_df['config/num_layers'] == 3]['val_loss']
val_loss_bidirectional_false = results_df[results_df['config/num_layers'] == 1]['val_loss']

# Perform a t-test
t_statistic, p_value = stats.ttest_ind(val_loss_bidirectional_true, val_loss_bidirectional_false)

# Print the results
print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

# Check if the difference in means is statistically significant
alpha = 0.05  # Significance level
if p_value < alpha:
    print("The difference in mean val_loss is statistically significant.")
else:
    print("The difference in mean val_loss is not statistically significant.")

In [None]:
# Loading models
Tuned_Models = {}
for basin in basins:
    Tuned_Models[basin] = torch.load(f'/data/Hydra_Work/Post_Rodeo_Work/Tuned_Single_Models/basin.pth')


# Tuning General Model

In [None]:
LR = 1e-3
static_size = np.shape(static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 0 #3
History_Statistics_in_forcings = 0 #5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
hindcast_input_size = 21 #


In [None]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper

# Fixed parameters
total_epochs = 200
n_epochs = 1 # Epochs between tests
group_lengths = [1] # 
batch_size = 64
copies = 1

# parameters to tune
# I tuned to 128,2,0.4,False,1e-3 
hidden_sizes = [64]
num_layers = [2]
dropout = [0.2] # 0.2 0.4 both equal
bidirectional =  [False]
learning_rate = [1e-3]
batch_size = [128] 

config_space = {
    "hidden_size": tune.grid_search(hidden_sizes),
    "num_layers": tune.grid_search(num_layers),
    "dropout": tune.grid_search(dropout),
    "bidirectional": tune.grid_search(bidirectional),
    "learning_rate": tune.grid_search(learning_rate),
    "batch_size": tune.grid_search(batch_size),
    'test_year': tune.grid_search(list(np.arange(2000,2024,2)) )
    
}


# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/'

In [None]:
def train_model_general(config):
    
    All_Dates = ray.get(All_Dates_id)  
    
    years = list(np.arange(2000,2024,2))
    test_year = config['test_year']
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = All_Dates[All_Dates.year.isin(val_years)]
    Train_Dates = All_Dates[All_Dates.year.isin(train_years)]
    
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)

    copies = 1
    
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')

    
    save_path = f'/data/Hydra_Work/3_Day_No_Forecast_Validation_Models/{test_year}/General_LSTM_No_Flow_Model/General_LSTM.pth'
    loss_path = f'/data/Hydra_Work/3_Day_No_Forecast_Validation_Models/{test_year}/General_LSTM_No_Flow_Model/General_LSTM_loss.txt'

    val_loss = 1000
    
    
    if not os.path.exists(loss_path):
        # If the file does not exist, create it and write val_loss to it
        with open(loss_path, 'w') as file:
            file.write('%f' % val_loss)
    
  
    models, params_to_optimize, optimizers, schedulers = define_models(hindcast_input_size, forecast_input_size,
    config["hidden_size"], config["num_layers"], config["dropout"],
    config["bidirectional"], config["learning_rate"], copies=copies, device = device)

    batch_size = config["batch_size"]
    losses, val_losses = [], []

    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}

        for copy in range(copies):

             # Need to fix the outputs of No_Body_Model_Run
            train_losses[copy], Climate_Loss = No_Body_Model_Run(Train_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised=False)
            epoch_val_losses[copy], Climate_Loss = No_Body_Model_Run(Val_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialised=False)

        loss = np.mean(list(train_losses.values())) - Climate_Loss


        candidate_val_loss = ((np.mean(list(epoch_val_losses.values())).mean() - Climate_Loss)[0])/np.mean(Climate_Loss)
        val_loss = np.min([val_loss, candidate_val_loss ])
        
        # Check best loss so far for this model
        with open(loss_path, 'r') as file:
            # Read the entire contents of the file
            Overall_Best_Val_Loss = float(file.read())

        if val_loss < Overall_Best_Val_Loss:
            torch.save(models[0], save_path)

            with open(loss_path, 'w') as f:
                f.write('%f' % val_loss)

            
               
        ray.train.report({'val_loss' : val_loss})

        losses.append(loss)
        val_losses.append(candidate_val_loss)
        #print(val_losses)
        #print(candidate_val_loss)
        #print(loss/np.mean(Climate_Loss))
    return val_loss


In [None]:
from ray import train, tune
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper
from ray.tune.search.optuna import OptunaSearch
import optuna


ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(static_variables)

In [None]:
# asha_scheduler = ASHAScheduler(
#     time_attr='training_iteration',
#     metric='val_loss',
#     mode='min',
#     max_t=100,
#     grace_period=20,
#     reduction_factor=2,
#     brackets=1,
# )


plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 20,
    grace_period=200,
    mode="min",
)


In [None]:
# {'hidden_size': 256, 'num_layers': 3, 'dropout': 0.1, 'bidirectional': True, 'learning_rate': 0.001}
# 7 Days:  128	2	0.4	False	0.001
def objective(config):  
    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    
    #print('Device available is', device)
    

    score = train_model_general(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}


#, search_alg = optuna_search
# optuna_tune_config = tune.TuneConfig(scheduler=asha_scheduler)
# tune_config = tune.TuneConfig(scheduler=asha_scheduler)
run_config=train.RunConfig(stop= plateau_stopper)

runs = 12
# Without Optuna
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 15/runs , "gpu": 1/runs }), param_space=config_space, run_config = run_config) 
# With Optuna
#tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space = optuna_config_space, tune_config = optuna_tune_config, run_config = run_config) 

results = tuner.fit()
# try get_best_checkpoint, or change val to be maximum of current val_loss and previous ones
best_config = results.get_best_result(metric="val_loss", mode="min").config
print(best_config)
file_path = f"/data/Hydra_Work/Tuning/Config_Text/General_Model_best_config.txt"

# Open the file in write mode and save the configuration
with open(file_path, "w") as f:
    f.write(str(best_config))

print("Best configuration saved to:", file_path)


In [None]:
results_df = results.get_dataframe()
results_df[results_df['val_loss'] < -0.15] 

In [None]:
General_Model = torch.load('/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/General_model.pth')



# Tuning Hydra Model

In [19]:
def define_models_hydra(body_hindcast_input_size, body_forecast_input_size, body_output_size, body_hidden_size, body_num_layers, body_dropout,
                        head_hidden_size, head_num_layers, head_forecast_output_size, head_dropout, bidirectional, basins,
                        learning_rate_general_head, learning_rate_head, learning_rate_body, LR = 1e-3, 
                        additional_specific_head_hindcast_input_size = 1, additional_specific_head_forecast_input_size = 0,
                        copies=1, device=None):
    Hydra_Bodys = {}
    Basin_Heads = {}
    General_Heads = {}   
    general_optimizers = {}
    specific_optimizers = {}
    specific_schedulers = {}
    optimizers = {}
    schedulers = {}
    
    body_forecast_output_size = body_output_size
    body_hindcast_output_size = body_output_size
    
    # Define head hindcast size as head-forecast for simplicty
    head_hindcast_output_size = head_forecast_output_size
    specific_head_hindcast_output_size = head_forecast_output_size
    specific_head_forecast_output_size = head_forecast_output_size
    specific_head_hidden_size = head_hidden_size
    specific_head_num_layers = head_num_layers
    
    # Head takes Body as inputs
    #head_hindcast_input_size = body_hindcast_input_size 
    head_hindcast_input_size = body_hindcast_output_size
    head_forecast_input_size = body_forecast_output_size
    
    # Specific input size
    specific_head_hindcast_input_size = head_hindcast_input_size + additional_specific_head_hindcast_input_size
    specific_head_forecast_input_size = head_forecast_input_size + additional_specific_head_forecast_input_size
    
    for copy in range(copies):
        Hydra_Bodys[copy] = Google_Model_Block(body_hindcast_input_size, body_forecast_input_size, body_hindcast_output_size, body_forecast_output_size, body_hidden_size, body_num_layers, device, body_dropout, bidirectional)
        General_Heads[copy] = Google_Model_Block(head_hindcast_input_size, head_forecast_input_size, head_hindcast_output_size, head_forecast_output_size, head_hidden_size, head_num_layers, device, head_dropout, bidirectional)
        Basin_Heads[copy] = Specific_Heads(basins, specific_head_hindcast_input_size, specific_head_forecast_input_size, specific_head_hindcast_output_size, specific_head_forecast_output_size, specific_head_hidden_size, specific_head_num_layers, device, head_dropout, bidirectional)


        specific_head_parameters = list()
        for basin, model in Basin_Heads[copy].items():
            specific_head_parameters += list(model.parameters())

        optimizers[copy] = torch.optim.Adam(
        # Extra LR is the global learning rate, not really important
        [
            {"params": General_Heads[copy].parameters(), "lr": learning_rate_general_head},
            {"params": specific_head_parameters, "lr": learning_rate_head},
            {"params": Hydra_Bodys[copy].parameters(), "lr": learning_rate_body},
        ],
        lr=LR, weight_decay = 1e-5) #1e-4 good so far, 3 not so food

        general_optimizers[copy] = torch.optim.Adam(
        # Extra LR is the global learning rate, not really important
        [
            {"params": General_Heads[copy].parameters(), "lr": learning_rate_general_head},
            {"params": Hydra_Bodys[copy].parameters(), "lr": learning_rate_body},
        ],
        lr=LR, )
        
        specific_optimizers[copy] = torch.optim.Adam(
        [
        {"params": specific_head_parameters, "lr": learning_rate_head}        
        ]
        )
        
        schedulers[copy] = lr_scheduler.StepLR(optimizers[copy], 1, gamma=0.98) #.CosineAnnealingLR(optimizers[copy], T_max= 100000, eta_min= 1e-4,)
        specific_schedulers[copy] = lr_scheduler.StepLR(specific_optimizers[copy], 1, gamma=0.98)
        
    return Hydra_Bodys, General_Heads, Basin_Heads, optimizers, schedulers, general_optimizers, specific_optimizers, specific_schedulers

In [20]:
LR = 1e-3
static_size = np.shape(static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 0 #3
History_Statistics_in_forcings = 0 # 5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
body_hindcast_input_size = 21
body_forecast_input_size = forecast_input_size


Overall_Best_Val_Loss = 999

In [27]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper

# Fixed parameters
total_epochs = 200
n_epochs = 1 # Epochs between tests
group_lengths = [1] #np.arange(180)

copies = 1
head_output_size = 3

body_hidden_sizes =  [64]
body_num_layers = [1]
body_dropouts = [0] 
body_learning_rates = [1e-3] 
body_outputs = [16]

head_hidden_sizes = [32]
head_num_layers = [1]
head_dropouts = [0.0]
head_learning_rates = [1e-2] #1e-2
batch_size = [128]
LR = 1e-3
bidirectionals = [False]
spec_multiplier = [1]

warmup = [0]

config_space = {
    "body_hidden_size": tune.grid_search(body_hidden_sizes),
    "body_num_layer": tune.grid_search(body_num_layers),
    "body_dropout": tune.grid_search(body_dropouts),
    "bidirectional": tune.grid_search(bidirectionals),
    "body_output": tune.grid_search(body_outputs),
    "body_learning_rate": tune.grid_search(body_learning_rates),
    "head_hidden_size": tune.grid_search(head_hidden_sizes),
    "head_num_layer": tune.grid_search(head_num_layers),
    "head_dropout": tune.grid_search(head_dropouts),
    "head_learning_rate": tune.grid_search(head_learning_rates),
    "spec_multiplier": tune.grid_search(spec_multiplier),
    'batch_size': tune.grid_search(batch_size),
    'test_year': tune.grid_search(list(np.arange(2000,2024,2))), #
    'warmup': tune.grid_search(warmup),
}

# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_Hydra_Model/'



In [28]:
def train_model_hydra(config):

    All_Dates = ray.get(All_Dates_id)  
    
    
    years = list(np.arange(2000,2024,2))
    test_year = config['test_year']
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = All_Dates[All_Dates.year.isin(val_years)]
    Train_Dates = All_Dates[All_Dates.year.isin(train_years)]

    
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)  
  
                        
    body_save_path = f'/data/Hydra_Work/3_Day_No_Forecast_Validation_Models/{test_year}/General_Head_Model/Hydra_Body_LSTM.pth'
    head_save_path = f'/data/Hydra_Work/3_Day_No_Forecast_Validation_Models/{test_year}/General_Head_Model/Hydra_Head_LSTM.pth'
    basin_heads_save_path = f'/data/Hydra_Work/3_Day_No_Forecast_Validation_Models/{test_year}/Basin_Head_Model'
    
    loss_path = f'/data/Hydra_Work/3_Day_No_Forecast_Validation_Models/{test_year}/General_Head_Model/Hydra_LSTM_loss.txt'

    val_loss = 1000
    
    
    if not os.path.exists(loss_path):
        # If the file does not exist, create it and write val_loss to it
        with open(loss_path, 'w') as file:
            file.write('%f' % val_loss)


    copies = 1
    warmup = config['warmup']
    best_val_loss = 100
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')
   

    general_head_learning_rate = config['body_learning_rate']
    Hydra_Bodys, General_Hydra_Heads, model_heads, optimizers, schedulers, general_optimizers, specific_optimizers, specific_schedulers  = define_models_hydra(body_hindcast_input_size, body_forecast_input_size, config['body_output'],
                                config['body_hidden_size'], config['body_num_layer'], config['body_dropout'], 
                                config['head_hidden_size'], config['head_num_layer'], 3, config['head_dropout'], config['bidirectional'], basins,
                                general_head_learning_rate, config['head_learning_rate'], config['body_learning_rate'], LR, device = device
                                )
     

    batch_size = config['batch_size']
                                                
    general_losses, specific_losses, general_val_losses, specific_val_losses, val_losses = [], [], [], [], []

    # Initialise, with dummy scheduler
    for copy in range(copies):
        # Initialise
        dummy_scheduler = lr_scheduler.StepLR(general_optimizers[copy],step_size = warmup, gamma = 0.8)

        Model_Run(Train_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
            Static_variables, general_optimizers[copy], dummy_scheduler, criterion, early_stopper= None, n_epochs= warmup,
            batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, feed_forcing = False)

    # Would be great if I could initialise the specific heads with the general heads, or finetune the specific heads at the end
            
    for epoch in range(total_epochs):
        train_general_losses = {}
        train_specific_losses = {}
        train_climate_losses = {}
        epoch_val_general_losses = {}
        epoch_val_specific_losses = {}
        climate_losses = {}
        
        for copy in range(copies):
                        

            # Full Training
            train_general_losses[copy], train_specific_losses[copy], train_climate_losses[copy] = Model_Run(Train_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs= n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, feed_forcing = False)

            # Just tuning specific heads
            train_general_losses[copy], train_specific_losses[copy], train_climate_losses[copy] = Model_Run(Train_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, specific_optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs= n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, feed_forcing = False)

            epoch_val_general_losses[copy], epoch_val_specific_losses[copy], climate_losses[copy] = Model_Run(Val_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs= n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, feed_forcing = False)


        general_loss = np.mean(list(train_general_losses.values()))
        specific_loss = np.mean(list(train_specific_losses.values()))
        train_climate_loss = np.mean(list(train_climate_losses.values()))
        climate_loss = np.mean(list(climate_losses.values()))
        
        epoch_val_general_loss = np.mean(list(epoch_val_general_losses.values())).mean()
        epoch_val_specific_loss = np.mean(list(epoch_val_specific_losses.values())).mean()
        
        
        general_losses.append(general_loss)
        specific_losses.append(specific_loss)
        specific_val_losses.append(epoch_val_specific_loss)
        general_val_losses.append(epoch_val_general_loss)

        val_loss = 0.5*(epoch_val_general_loss + epoch_val_specific_loss)
        candidate_val_loss = ((val_loss.mean() - climate_loss))/np.mean(climate_loss)
        best_val_loss = np.min([best_val_loss, candidate_val_loss ])
         
        with open(loss_path, 'r') as file:
            # Read the entire contents of the file
            Overall_Best_Val_Loss = float(file.read())

        if best_val_loss < Overall_Best_Val_Loss:
            with open(loss_path, 'w') as f:
                f.write('%f' % best_val_loss)

            torch.save(Hydra_Bodys[0], body_save_path)
            torch.save(General_Hydra_Heads[0], head_save_path)
            for basin in basins:
                torch.save(model_heads[0][basin], f"{basin_heads_save_path}/{basin}.path")
                
            
               
        ray.train.report({'val_loss' : best_val_loss})
        #print('Validation Loss', val_losses)
        #print('Training Loss', general_losses/train_climate_loss )
        #print(candidate_val_loss)
        val_losses.append(candidate_val_loss)

    return best_val_loss



In [29]:
import ray
from ray import train, tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper


ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(static_variables)


2024-06-24 11:05:43,227	INFO worker.py:1724 -- Started a local Ray instance.


In [30]:
asha_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric='val_loss',
    mode='min',
    max_t=100,
    grace_period=20,
    reduction_factor=3,
    brackets=1,
)


plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 100,
    grace_period=20,
    mode="min",
)


In [32]:
runs_per_iteration = 12
def objective(config):  
    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    

    score = train_model_hydra(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}


run_config=train.RunConfig(stop= plateau_stopper)
# Can use fractions of GPU
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 15/runs_per_iteration, "gpu": 1/(runs_per_iteration)}), param_space=config_space, run_config = run_config) 

results = tuner.fit()
best_config = results.get_best_result(metric="val_loss", mode="min").config
print(best_config)
file_path = f"/data/Hydra_Work/Tuning/Config_Text/Hydral_Model_best_config.txt"

# Open the file in write mode and save the configuration
with open(file_path, "w") as f:
    f.write(str(best_config))

print("Best configuration saved to:", file_path)

0,1
Current time:,2024-06-24 14:21:40
Running for:,03:15:52.74
Memory:,67.0/125.9 GiB

Trial name,status,loc,batch_size,bidirectional,body_dropout,body_hidden_size,body_learning_rate,body_num_layer,body_output,head_dropout,head_hidden_size,head_learning_rate,head_num_layer,spec_multiplier,test_year,warmup,iter,total time (s),val_loss
objective_b5c17_00000,TERMINATED,136.156.133.98:2402050,128,False,0,64,0.001,1,16,0,32,0.01,1,1,2000,0,201,11062.2,-0.81489
objective_b5c17_00001,TERMINATED,136.156.133.98:2402055,128,False,0,64,0.001,1,16,0,32,0.01,1,1,2002,0,195,10256.3,-0.8162
objective_b5c17_00002,TERMINATED,136.156.133.98:2402056,128,False,0,64,0.001,1,16,0,32,0.01,1,1,2004,0,201,11416.7,-0.821339
objective_b5c17_00003,TERMINATED,136.156.133.98:2402073,128,False,0,64,0.001,1,16,0,32,0.01,1,1,2006,0,154,9773.96,-0.800173
objective_b5c17_00004,TERMINATED,136.156.133.98:2402076,128,False,0,64,0.001,1,16,0,32,0.01,1,1,2008,0,194,11495.4,-0.80194
objective_b5c17_00005,TERMINATED,136.156.133.98:2402077,128,False,0,64,0.001,1,16,0,32,0.01,1,1,2010,0,162,10278.6,-0.801994
objective_b5c17_00006,TERMINATED,136.156.133.98:2402082,128,False,0,64,0.001,1,16,0,32,0.01,1,1,2012,0,201,11414.3,-0.808971
objective_b5c17_00007,TERMINATED,136.156.133.98:2402087,128,False,0,64,0.001,1,16,0,32,0.01,1,1,2014,0,198,11745.4,-0.80699
objective_b5c17_00008,TERMINATED,136.156.133.98:2402088,128,False,0,64,0.001,1,16,0,32,0.01,1,1,2016,0,184,11318.5,-0.821041
objective_b5c17_00009,TERMINATED,136.156.133.98:2402093,128,False,0,64,0.001,1,16,0,32,0.01,1,1,2018,0,199,10867.8,-0.795968


2024-06-24 13:23:52,864	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', 'd0c36a7a')}
2024-06-24 13:48:48,626	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '77ade844')}
2024-06-24 13:56:51,061	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '6269ca81')}
2024-06-24 13:57:13,378	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '80f16ada')}
2024-06-24 14:07:02,687	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '93632d75')}
2024-06-24 14:10:16,950	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '52467d93')}
2024-06-24

{'body_hidden_size': 64, 'body_num_layer': 1, 'body_dropout': 0, 'bidirectional': False, 'body_output': 16, 'body_learning_rate': 0.001, 'head_hidden_size': 32, 'head_num_layer': 1, 'head_dropout': 0.0, 'head_learning_rate': 0.01, 'spec_multiplier': 1, 'batch_size': 128, 'test_year': 2004, 'warmup': 0}
Best configuration saved to: /data/Hydra_Work/Tuning/Config_Text/Hydral_Model_best_config.txt


In [None]:
results_df = results.get_dataframe()

In [None]:
results_df[results_df['val_loss'] < -0.75][['val_loss', 'config/body_hidden_size']]

In [None]:
results_df

# General Model with flags


In [33]:

LR = 1e-3
static_size = np.shape(static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 0 #3
History_Statistics_in_forcings = 0 #5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
hindcast_input_size = 23 #8 for no flow without new data, 26 for flow with new, 25 for no flow with new


In [34]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper

# Fixed parameters
total_epochs = 200
n_epochs = 1 # Epochs between tests
group_lengths = [1] # 
batch_size = 64
copies = 1

# parameters to tune
# I tuned to 128,2,0.4,False,1e-3 
hidden_sizes = [64]
num_layers = [2]
dropout = [0.1] # 0.2 0.4 both equal
bidirectional =  [False]
learning_rate = [1e-3]
batch_size = [128] 
p = [0.5]

config_space = {
    "hidden_size": tune.grid_search(hidden_sizes),
    "num_layers": tune.grid_search(num_layers),
    "dropout": tune.grid_search(dropout),
    "bidirectional": tune.grid_search(bidirectional),
    "learning_rate": tune.grid_search(learning_rate),
    "batch_size": tune.grid_search(batch_size),
    "p": tune.grid_search(p),
    'test_year': tune.grid_search(list(np.arange(2000,2024,2)) )
    
}


# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/'

In [35]:
def train_model_general_with_flag(config):
    
    All_Dates = ray.get(All_Dates_id)  
    
    years = list(np.arange(2000,2024,2))
    test_year = config['test_year']
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = All_Dates[All_Dates.year.isin(val_years)]
    Train_Dates = All_Dates[All_Dates.year.isin(train_years)]
    
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)

    copies = 1
    
    
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')

    
    save_path = f'/data/Hydra_Work/3_Day_No_Forecast_Validation_Models/{test_year}/General_LSTM_Model/General_LSTM_With_Flags.pth'
    loss_path = f'/data/Hydra_Work/3_Day_No_Forecast_Validation_Models/{test_year}/General_LSTM_Model/General_LSTM_With_Flags_loss.txt'

    val_loss = 1000
    
    
    if not os.path.exists(loss_path):
        # If the file does not exist, create it and write val_loss to it
        with open(loss_path, 'w') as file:
            file.write('%f' % val_loss)
    
  
    models, params_to_optimize, optimizers, schedulers = define_models(hindcast_input_size, forecast_input_size,
    config["hidden_size"], config["num_layers"], config["dropout"],
    config["bidirectional"], config["learning_rate"], copies=copies, device = device)

    batch_size = config["batch_size"]
    p = config["p"]
    losses, val_losses = [], []

    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}

        for copy in range(copies):

             # Need to fix the outputs of No_Body_Model_Run
            train_losses[copy], Climate_Loss = Indicator_LSTM_Run(Train_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised=False, p = p)
            epoch_val_losses[copy], Climate_Loss = Indicator_LSTM_Run(Val_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialised=False, p = p)

        loss = np.mean(list(train_losses.values())) - Climate_Loss


        candidate_val_loss = ((np.mean(list(epoch_val_losses.values())).mean() - Climate_Loss)[0])/np.mean(Climate_Loss)
        val_loss = np.min([val_loss, candidate_val_loss ])
        
        # Check best loss so far for this model
        with open(loss_path, 'r') as file:
            # Read the entire contents of the file
            Overall_Best_Val_Loss = float(file.read())

        if val_loss < Overall_Best_Val_Loss:
            torch.save(models[0], save_path)

            with open(loss_path, 'w') as f:
                f.write('%f' % val_loss)

            
               
        ray.train.report({'val_loss' : val_loss})

        losses.append(loss)
        val_losses.append(candidate_val_loss)
        #print(val_losses)
        #print(candidate_val_loss)
        #print(loss/np.mean(Climate_Loss))
    return val_loss

In [37]:
from ray import train, tune
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper
from ray.tune.search.optuna import OptunaSearch
import optuna


ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(static_variables)

plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 20,
    grace_period=50,
    mode="min",
)


def objective(config):  
    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    
    #print('Device available is', device)
    

    score = train_model_general_with_flag(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}


#, search_alg = optuna_search
# optuna_tune_config = tune.TuneConfig(scheduler=asha_scheduler)
# tune_config = tune.TuneConfig(scheduler=asha_scheduler)
run_config=train.RunConfig(stop= plateau_stopper)



2024-06-24 14:23:01,054	INFO worker.py:1724 -- Started a local Ray instance.


In [39]:
runs = 12
# Without Optuna
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 15/runs , "gpu": 1/runs }), param_space=config_space, run_config = run_config) 
# With Optuna
#tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space = optuna_config_space, tune_config = optuna_tune_config, run_config = run_config) 

results = tuner.fit()
# try get_best_checkpoint, or change val to be maximum of current val_loss and previous ones
best_config = results.get_best_result(metric="val_loss", mode="min").config
print(best_config)
file_path = f"/data/Hydra_Work/Tuning/Config_Text/General_Model_best_config.txt"

# Open the file in write mode and save the configuration
with open(file_path, "w") as f:
    f.write(str(best_config))

print("Best configuration saved to:", file_path)

0,1
Current time:,2024-06-24 15:20:25
Running for:,00:27:10.27
Memory:,75.1/125.9 GiB

Trial name,status,loc,batch_size,bidirectional,dropout,hidden_size,learning_rate,num_layers,p,test_year,iter,total time (s),val_loss
objective_7c109_00005,RUNNING,136.156.133.98:2412528,128,False,0.1,64,0.001,2,0.5,2010,53,1614.82,-0.758524
objective_7c109_00008,RUNNING,136.156.133.98:2413041,128,False,0.1,64,0.001,2,0.5,2016,51,1610.74,-0.803969
objective_7c109_00011,RUNNING,136.156.133.98:2413061,128,False,0.1,64,0.001,2,0.5,2022,53,1625.5,-0.82514
objective_7c109_00000,TERMINATED,136.156.133.98:2412523,128,False,0.1,64,0.001,2,0.5,2000,50,1347.31,-0.789365
objective_7c109_00001,TERMINATED,136.156.133.98:2412524,128,False,0.1,64,0.001,2,0.5,2002,50,1364.0,-0.807375
objective_7c109_00002,TERMINATED,136.156.133.98:2412525,128,False,0.1,64,0.001,2,0.5,2004,50,1577.62,-0.76943
objective_7c109_00003,TERMINATED,136.156.133.98:2412526,128,False,0.1,64,0.001,2,0.5,2006,50,1589.16,-0.76477
objective_7c109_00004,TERMINATED,136.156.133.98:2412527,128,False,0.1,64,0.001,2,0.5,2008,50,1573.04,-0.776447
objective_7c109_00006,TERMINATED,136.156.133.98:2412785,128,False,0.1,64,0.001,2,0.5,2012,53,1456.31,-0.782534
objective_7c109_00007,TERMINATED,136.156.133.98:2412786,128,False,0.1,64,0.001,2,0.5,2014,50,1583.95,-0.770769


2024-06-24 15:15:45,577	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '52467d93')}
2024-06-24 15:16:02,272	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '6269ca81')}
2024-06-24 15:17:35,643	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '34b3c1e4')}
2024-06-24 15:18:51,897	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '93632d75')}
2024-06-24 15:19:28,544	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', 'd0c36a7a')}
2024-06-24 15:19:31,537	INFO tensorboardx.py:275 -- Removed the following hyperparameter values when logging to tensorboard: {'test_year': ('__ref_ph', '5a92826b')}
2024-06-24

{'hidden_size': 64, 'num_layers': 2, 'dropout': 0.1, 'bidirectional': False, 'learning_rate': 0.001, 'batch_size': 128, 'p': 0.5, 'test_year': 2022}
Best configuration saved to: /data/Hydra_Work/Tuning/Config_Text/General_Model_best_config.txt
