In [1]:
import os
import pickle
import joblib
import pandas as pd
import numpy as np
import random
import itertools

import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')

import sys
sys.path.append('/data/Hydra_Work/Competition_Functions') 
from Processing_Functions import process_forecast_date, process_seasonal_forecasts
from Data_Transforming import read_nested_csvs, generate_daily_flow, use_USGS_flow_data, USGS_to_daily_df_yearly

sys.path.append('/data/Hydra_Work/Pipeline_Functions')
from Folder_Work import filter_rows_by_year, csv_dictionary, add_day_of_year_column

sys.path.append('/data/Hydra_Work/Post_Rodeo_Work/ML_Functions.py')
from Full_LSTM_ML_Functions import Specific_Heads, Google_Model_Block, SumPinballLoss, EarlyStopper, Model_Run, No_Body_Model_Run



from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim.lr_scheduler as lr_scheduler





# Making the cross validation set

Cross Validation decisions:
- It looks like I only have 10 years right now, and if the results are good I can keep it that way (justify by independent years)
- Training set of 80% and Validation of 20% is fine, makes sense to make the Validation years adjacent instead of random, probably doesn't matter much but adjacent minimises theyre connection with the years in the training dataset
- This means theres only 5 folds which shouldn't take forever to do 
- There's an issue right now where my validation set is also my test set, how much can I get around this?
- I could test a 70-20-10 set up, from the looks of it there won't be that much loss in performance by reducing the training set by 12%? 
- If I assume the years are independent then it doesn't matter which dates I choose for validation years when I've got a specific testing year
- K -fold cross validation means splitting the data in k chunks and choosing a different chunk for each, p-fold involves choosing all possible combinations of size p for the splits

Structure of the folders:
- Can do Validation_Models/Val_Years/Model/.pth, bs Model/Val_Years/.pth
- I think the first makes more sense, I would realy want to ompare models trained over the same years


Restructuring Current code:
- I want to fit this whole thing into a for loop so I can run it
- Alternatively I can have the validation years as a parameter in the config_space and just let the code run as is
- It would be nice to make the prep section smaller visually, or hidden somewhere else


In [2]:
import sys

def get_env():
    sp = sys.path[1].split("/")
    if "envs" in sp:
        return sp[sp.index("envs") + 1]
    else:
        return ""
    
print(get_env())

Hydra_Code


In [3]:
basins = ['libby_reservoir_inflow',  'owyhee_r_bl_owyhee_dam',  'san_joaquin_river_millerton_reservoir',  'taylor_park_reservoir_inflow',
 'boise_r_nr_boise', 'green_r_bl_howard_a_hanson_dam', 'weber_r_nr_oakley', 'detroit_lake_inflow', 'virgin_r_at_virtin', 'dillon_reservoir_inflow',
 'pueblo_reservoir_inflow', 'hungry_horse_reservoir_inflow', 'stehekin_r_at_stehekin', 'pecos_r_nr_pecos', 'snake_r_nr_heise', 'yampa_r_nr_maybell',
 'colville_r_at_kettle_falls', 'missouri_r_at_toston', 'merced_river_yosemite_at_pohono_bridge', 'animas_r_at_durango','fontenelle_reservoir_inflow', 'boysen_reservoir_inflow']

selected_years = range(2000,2024,2)


base_dir = "/data/Hydra_Work/Scaled_Data"

# Define dictionaries and DataFrames
dictionaries = ['era5', 'seasonal_forecasts', 'daily_flow', 'climatological_flows']

dataframes = ['climate_indices', 'static_variables']

# Function to load dictionaries
def load_dictionaries(base_dir, names):
    loaded_dicts = {}
    for name in names:
        file_path = os.path.join(base_dir, f"{name}.pkl")
        with open(file_path, 'rb') as file:
            locals()[name] = pickle.load(file)
    return locals()

# Function to load DataFrames
def load_dataframes(base_dir, names):
    loaded_dfs = {}
    for name in names:
        file_path = os.path.join(base_dir, f"{name}.pkl")
        locals()[name] = pd.read_pickle(file_path)
    return locals()

saved_dicts = load_dictionaries(base_dir, dictionaries)
saved_dfs = load_dataframes(base_dir, dataframes)

for name in dictionaries:
    locals()[name] = saved_dicts[name]

for name in dataframes:
    locals()[name] = saved_dfs[name]

criterion = SumPinballLoss(quantiles = [0.1, 0.5, 0.9])

basin = 'animas_r_at_durango' 
All_Dates = daily_flow[basin].index[
    ((daily_flow[basin].index.month < 6) | ((daily_flow[basin].index.month == 6) & (daily_flow[basin].index.day < 24))) &
    ((daily_flow[basin].index.year % 2 == 0) | ((daily_flow[basin].index.month > 10) | ((daily_flow[basin].index.month == 10) & (daily_flow[basin].index.day >= 1))))
]
All_Dates = All_Dates[All_Dates.year > 1998]


# Validation Year
Val_Dates = All_Dates[All_Dates.year >= 2020]
All_Dates = All_Dates[All_Dates.year < 2020]

device = torch.device('cuda' if torch.cuda.
                is_available() else 'cpu')

criterion = SumPinballLoss(quantiles = [0.1, 0.5, 0.9])

basin = 'animas_r_at_durango' 
All_Dates = daily_flow[basin].index[
    ((daily_flow[basin].index.month < 6) | ((daily_flow[basin].index.month == 6) & (daily_flow[basin].index.day < 24))) &
    ((daily_flow[basin].index.year % 2 == 0) | ((daily_flow[basin].index.month > 10) | ((daily_flow[basin].index.month == 10) & (daily_flow[basin].index.day >= 1))))
]
All_Dates = All_Dates[All_Dates.year > 1998]


# Validation Year
Val_Dates = All_Dates[All_Dates.year >= 2018]
Val_Dates = Val_Dates[Val_Dates.year <= 2022]
Train_Dates = All_Dates[All_Dates.year == 2022]

seed = 42 ; torch.manual_seed(seed) ; random.seed(seed) ; np.random.seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

days  = 90




# Tuning individual basins

In [4]:
LR = 1e-3
static_size = np.shape(static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 0 #3 # THis is about climatology, not climate indices
History_Statistics_in_forcings = 0  #5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
hindcast_input_size = 9 # 17 if we include climate indices



In [5]:
Retrain_Basins = basins
for basin in basins:
    loss_path = f'/data/Hydra_Work/Tuning/Week_Ahead_Models_V2/Specific_Week_Ahead_Models/{basin}_specific_loss.txt'
    
    with open(loss_path, 'r') as file:
    # Read the entire contents of the file
        Overall_Best_Val_Loss = float(file.read())
    
    if Overall_Best_Val_Loss < -0.05:
        Retrain_Basins = list(set(Retrain_Basins) - set([basin]))

In [6]:
len(Retrain_Basins)

0

In [9]:
# Do we want hindcast and forecast num-layers to be different?
def define_models(hindcast_input_size, forecast_input_size, hidden_size, num_layers, dropout, bidirectional, learning_rate, copies = 3, forecast_output_size = 3, device = device):
    models = {}
    params_to_optimize = {}
    optimizers = {}
    schedulers = {}
    
    hindcast_output_size = forecast_output_size
    for copy in range(copies):
        models[copy] = Google_Model_Block(hindcast_input_size, forecast_input_size, hindcast_output_size, forecast_output_size, hidden_size, num_layers, device, dropout, bidirectional)
        
        models[copy].to(device)
        params_to_optimize[copy] = list(models[copy].parameters())
        # Probably should be doing 1e-2 and 10
        optimizers[copy] = torch.optim.Adam(params_to_optimize[copy], lr= learning_rate, weight_decay = 1e-3)
        schedulers[copy] = lr_scheduler.CosineAnnealingLR(optimizers[copy], T_max = 50)
        #.StepLR(optimizers[copy], 5, gamma=0.5)
        

    return models, params_to_optimize, optimizers, schedulers


In [None]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper
from ray.tune.search.optuna import OptunaSearch
import optuna

# Fixed parameters
total_epochs = 40
n_epochs = 1  # Epochs between tests
group_lengths = [7] #np.arange(180) 7 Day ahead for streamlined version
batch_size = 1
copies = 1

# parameters to tune
hidden_sizes = [128] # 64 converged upon
num_layers =  [1]
dropout = [0.1]
bidirectional = [False] #[True, False]
learning_rate = [1e-3, 1e-4] #[1e-3, 1e-5]


# Set up configuration space
config_space = {

    "hidden_size": tune.grid_search(hidden_sizes),
    "num_layers": tune.grid_search(num_layers),
    "dropout": tune.grid_search(dropout),
    "bidirectional": tune.grid_search(bidirectional),
    "learning_rate": tune.grid_search(learning_rate),
    "basin":  tune.grid_search(basins),
    'test_year': tune.grid_search(list(np.arange(2000,2024,2)) )

}




In [None]:
def train_model(config):

    All_Dates = ray.get(All_Dates_id)  

    years = list(np.arange(2000,2024,2))
    test_year = config['test_year']
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = All_Dates[All_Dates.year.isin(val_years)]
    Train_Dates = All_Dates[All_Dates.year.isin(train_years)]

    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)

    val_loss = 1000

    basin = config["basin"]

    save_path = f'/data/Hydra_Work/Validation_Models/{test_year}/Specific_LSTM_Model/{basin}_specific.pth'
    loss_path = f'/data/Hydra_Work/Validation_Models/{test_year}/Specific_LSTM_Model/{basin}_specific_loss.txt'

    
    if not os.path.exists(loss_path):
        # If the file does not exist, create it and write val_loss to it
        with open(loss_path, 'w') as file:
            file.write('%f' % val_loss)
    
    copies = 1
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')
    
    models, params_to_optimize, optimizers, schedulers = define_models(hindcast_input_size, forecast_input_size,
    config["hidden_size"], config["num_layers"], config["dropout"],
    config["bidirectional"], config["learning_rate"], copies=copies, device = device)

    losses, val_losses = [], []

    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}

        for copy in range(copies):

             # Need to fix the outputs of No_Body_Model_Run
            train_losses[copy], Climate_Loss = No_Body_Model_Run(All_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised=False)
            epoch_val_losses[copy], Climate_Loss = No_Body_Model_Run(Val_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialised=False)

        loss = np.mean(list(train_losses.values())) - Climate_Loss
        

        candidate_val_loss = ((np.mean( list(epoch_val_losses.values()) ).mean() - Climate_Loss)[0])/np.mean(Climate_Loss)
        val_loss = np.min([val_loss, candidate_val_loss ])
        if candidate_val_loss == val_loss:
             torch.save(models[0], save_path)
             
        
        # Check best loss so far for this model
        with open(loss_path, 'r') as file:
            # Read the entire contents of the file
            Overall_Best_Val_Loss = float(file.read())

        if val_loss < Overall_Best_Val_Loss:
            torch.save(models[0], save_path)

            with open(loss_path, 'w') as f:
                f.write('%f' % val_loss)


        ray.train.report({'val_loss' : val_loss})
        print(candidate_val_loss)
        losses.append(loss)
        val_losses.append(val_loss)


    return val_loss

    


In [None]:
from ray import train, tune


ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)  
Val_Dates_id = ray.put(Val_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(static_variables)


In [None]:
asha_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric='val_loss',
    mode='min',
    max_t=100,
    grace_period=20,
    reduction_factor=2,
    brackets=1,
)


plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 5,
    grace_period=20,
    mode="min",
)


In [None]:
# Stehekin gives :True	0.4	64	0.001	3 Even looking at overall min, and for animas r at durango
# T-tests suggests: Bidirectional good, dropout unimportant, 16 bad, 64 vs 128 unimportant. All models that imrpvoed loss wre bidirectional
# Libby seemed to want an single layer
# San Joaqin is just hard, score of 9.4: {'hidden_size': 64, 'num_layers': 1, 'dropout': 0.4, 'bidirectional': False, 'learning_rate': 1e-05}



# At weekly:
# Animas has {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.1, 'bidirectional': False, 'learning_rate': 1e-05}, 64,3,0.1. Results for 64, 1, 0.1, True identical
def objective(config):   

    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    
    #print('Device available is', device)
    

    score = train_model(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}

basin = 'stehekin_r_at_stehekin'

#, search_alg = optuna_search
optuna_tune_config = tune.TuneConfig(scheduler=asha_scheduler)
tune_config = tune.TuneConfig(scheduler=asha_scheduler)
running_tune_config = tune.TuneConfig()

run_config=train.RunConfig(stop= plateau_stopper)

# Note using < 1gb per run stops pylance from crashing I think
# Without Optun
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 15/11, "gpu": 1/11}), param_space=config_space, tune_config = tune_config, run_config = run_config) 
# With Optuna
#tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space = optuna_config_space, tune_config = optuna_tune_config, run_config = run_config) 

results = tuner.fit()
best_config = results.get_best_result(metric="val_loss", mode="min").config
print(best_config)



# Define the file path where you want to save the best configuration
file_path = f"/data/Hydra_Work/Tuning/Config_Text/{basin}_best_config.txt"
# Open the file in write mode and save the configuration
with open(file_path, "w") as f:
    f.write(str(best_config))

print("Best configuration saved to:", file_path)


In [None]:
results_df = results.get_dataframe()
results_df[results_df['val_loss'] < -0.7][['val_loss', 'config/basin', 'config/test_year']]

In [None]:
Safe_Basins = list(results_df[results_df['val_loss'] < -0.05]['config/basin'].values)
Retrain_Basins = list(set(basins) - set(Safe_Basins))
Retrain_Basins

In [None]:
from scipy import stats

results_df = results.get_dataframe()
columns_to_drop = ['timestamp', 'checkpoint_dir_name', 'done', 'training_iteration', 
                   'trial_id', 'date', 'time_this_iter_s', 'time_total_s', 'pid', 
                   'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore']

# Drop the columns
results_df.drop(columns=columns_to_drop, inplace=True)

val_loss_bidirectional_true = results_df[results_df['config/num_layers'] == 3]['val_loss']
val_loss_bidirectional_false = results_df[results_df['config/num_layers'] == 1]['val_loss']

# Perform a t-test
t_statistic, p_value = stats.ttest_ind(val_loss_bidirectional_true, val_loss_bidirectional_false)

# Print the results
print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

# Check if the difference in means is statistically significant
alpha = 0.05  # Significance level
if p_value < alpha:
    print("The difference in mean val_loss is statistically significant.")
else:
    print("The difference in mean val_loss is not statistically significant.")

In [None]:
# Loading models
Tuned_Models = {}
for basin in basins:
    Tuned_Models[basin] = torch.load(f'/data/Hydra_Work/Post_Rodeo_Work/Tuned_Single_Models/basin.pth')


# Tuning General Model

In [10]:
LR = 1e-3
static_size = np.shape(static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 0 #3
History_Statistics_in_forcings = 0 #5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
hindcast_input_size = 8

In [11]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper

# Fixed parameters
total_epochs = 30
n_epochs = 1 # Epochs between tests
group_lengths = [7] # 
batch_size = 1
copies = 2

# parameters to tune
# I tuned to 128,2,0.4,False,1e-3 
hidden_sizes = [128]
num_layers = [1]
dropout = [0.1]
bidirectional =  [False]
learning_rate = [5e-4]

config_space = {
    "hidden_size": tune.grid_search(hidden_sizes),
    "num_layers": tune.grid_search(num_layers),
    "dropout": tune.grid_search(dropout),
    "bidirectional": tune.grid_search(bidirectional),
    "learning_rate": tune.grid_search(learning_rate),
    'test_year': tune.grid_search(list(np.arange(2000,2024,2)) )
    
}


# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/'

In [12]:
    years = list(np.arange(2000,2024,2))
    test_year = 2000
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = [date for date in All_Dates if date.year in val_years]
    Train_Dates = [date for date in All_Dates if date.year in train_years]


In [13]:
val_years

[2022, 2020]

In [14]:
def train_model_general(config):
    
    All_Dates = ray.get(All_Dates_id)  
    
    years = list(np.arange(2000,2024,2))
    test_year = config['test_year']
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = All_Dates[All_Dates.year.isin(val_years)]
    Train_Dates = All_Dates[All_Dates.year.isin(train_years)]
    
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)

    copies = 1
    
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')
    
    save_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/General_LSTM_No_Flow_Model/General_LSTM.pth'
    loss_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/General_LSTM_No_Flow_Model/General_LSTM_loss.txt'

    val_loss = 1000
    
    
    if not os.path.exists(loss_path):
        # If the file does not exist, create it and write val_loss to it
        with open(loss_path, 'w') as file:
            file.write('%f' % val_loss)
    
  
    models, params_to_optimize, optimizers, schedulers = define_models(hindcast_input_size, forecast_input_size,
    config["hidden_size"], config["num_layers"], config["dropout"],
    config["bidirectional"], config["learning_rate"], copies=copies, device = device)

    losses, val_losses = [], []

    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}

        for copy in range(copies):

             # Need to fix the outputs of No_Body_Model_Run
            train_losses[copy], Climate_Loss = No_Body_Model_Run(Train_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised=False)
            epoch_val_losses[copy], Climate_Loss = No_Body_Model_Run(Val_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialised=False)

        loss = np.mean(list(train_losses.values())) - Climate_Loss


        candidate_val_loss = ((np.mean(list(epoch_val_losses.values())).mean() - Climate_Loss)[0])/np.mean(Climate_Loss)
        val_loss = np.min([val_loss, candidate_val_loss ])
        
        # Check best loss so far for this model
        with open(loss_path, 'r') as file:
            # Read the entire contents of the file
            Overall_Best_Val_Loss = float(file.read())

        if val_loss < Overall_Best_Val_Loss:
            torch.save(models[0], save_path)

            with open(loss_path, 'w') as f:
                f.write('%f' % val_loss)

            
               
        ray.train.report({'val_loss' : val_loss})

        losses.append(loss)
        val_losses.append(val_loss)


    return val_loss


In [15]:
from ray import train, tune



ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)  
Val_Dates_id = ray.put(Val_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(static_variables)

2024-05-30 07:10:07,200	INFO worker.py:1724 -- Started a local Ray instance.


In [16]:
# asha_scheduler = ASHAScheduler(
#     time_attr='training_iteration',
#     metric='val_loss',
#     mode='min',
#     max_t=100,
#     grace_period=20,
#     reduction_factor=2,
#     brackets=1,
# )


plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 5,
    grace_period=30,
    mode="min",
)


In [17]:
# {'hidden_size': 256, 'num_layers': 3, 'dropout': 0.1, 'bidirectional': True, 'learning_rate': 0.001}
# 7 Days:  128	2	0.4	False	0.001
def objective(config):  
    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    
    #print('Device available is', device)
    

    score = train_model_general(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}


#, search_alg = optuna_search
# optuna_tune_config = tune.TuneConfig(scheduler=asha_scheduler)
# tune_config = tune.TuneConfig(scheduler=asha_scheduler)
run_config=train.RunConfig(stop= plateau_stopper)

# Without Optuna
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 15/12, "gpu": 1/12}), param_space=config_space, run_config = run_config) 
# With Optuna
#tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space = optuna_config_space, tune_config = optuna_tune_config, run_config = run_config) 

results = tuner.fit()
# try get_best_checkpoint, or change val to be maximum of current val_loss and previous ones
best_config = results.get_best_result(metric="val_loss", mode="min").config
print(best_config)
file_path = f"/data/Hydra_Work/Tuning/Config_Text/General_Model_best_config.txt"

# Open the file in write mode and save the configuration
with open(file_path, "w") as f:
    f.write(str(best_config))

print("Best configuration saved to:", file_path)


0,1
Current time:,2024-05-30 07:14:41
Running for:,00:04:32.82
Memory:,76.7/125.9 GiB

Trial name,status,loc,bidirectional,dropout,hidden_size,learning_rate,num_layers,test_year,iter,total time (s),val_loss
objective_a5dbc_00000,RUNNING,136.156.133.98:593713,False,0.1,128,0.0005,1,2000,2,189.931,1.14647
objective_a5dbc_00001,RUNNING,136.156.133.98:593715,False,0.1,128,0.0005,1,2002,3,267.164,0.807647
objective_a5dbc_00002,RUNNING,136.156.133.98:593717,False,0.1,128,0.0005,1,2004,2,193.322,0.571646
objective_a5dbc_00003,RUNNING,136.156.133.98:593716,False,0.1,128,0.0005,1,2006,2,194.116,0.461234
objective_a5dbc_00004,RUNNING,136.156.133.98:593726,False,0.1,128,0.0005,1,2008,2,193.657,0.560798
objective_a5dbc_00005,RUNNING,136.156.133.98:593727,False,0.1,128,0.0005,1,2010,2,194.488,0.956412
objective_a5dbc_00006,RUNNING,136.156.133.98:593728,False,0.1,128,0.0005,1,2012,3,265.944,0.801351
objective_a5dbc_00007,RUNNING,136.156.133.98:593731,False,0.1,128,0.0005,1,2014,2,188.171,0.856294
objective_a5dbc_00008,RUNNING,136.156.133.98:593738,False,0.1,128,0.0005,1,2016,3,265.266,0.864256
objective_a5dbc_00009,RUNNING,136.156.133.98:593739,False,0.1,128,0.0005,1,2018,2,193.969,1.40002




2024-05-30 07:14:51,462	INFO tune.py:1042 -- Total run time: 282.92 seconds (272.81 seconds for the tuning loop).
Resume experiment with: Tuner.restore(path="/home/gbmc/ray_results/objective_2024-05-30_07-10-08", trainable=...)


{'hidden_size': 128, 'num_layers': 1, 'dropout': 0.1, 'bidirectional': False, 'learning_rate': 0.0005, 'test_year': 2006}
Best configuration saved to: /data/Hydra_Work/Tuning/Config_Text/General_Model_best_config.txt


In [None]:
results_df = results.get_dataframe()
results_df[results_df['val_loss'] < -0.15] 

In [None]:
General_Model = torch.load('/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/General_model.pth')



# Tuning Hydra Model

In [31]:
def define_models_hydra(body_hindcast_input_size, body_forecast_input_size, body_output_size, body_hidden_size, body_num_layers, body_dropout,
                        head_hidden_size, head_num_layers, head_forecast_output_size, head_dropout, bidirectional, basins,
                        learning_rate_general_head, learning_rate_head, learning_rate_body, LR = 1e-3, 
                        additional_specific_head_hindcast_input_size = 1, additional_specific_head_forecast_input_size = 0,
                        copies=1, device=None):
    Hydra_Bodys = {}
    Basin_Heads = {}
    General_Heads = {}   
    general_optimizers = {}
    optimizers = {}
    schedulers = {}
    
    body_forecast_output_size = body_output_size
    body_hindcast_output_size = body_output_size
    
    # Define head hindcast size as head-forecast for simplicty
    head_hindcast_output_size = head_forecast_output_size
    specific_head_hindcast_output_size = head_forecast_output_size
    specific_head_forecast_output_size = head_forecast_output_size
    specific_head_hidden_size = head_hidden_size
    specific_head_num_layers = head_num_layers
    
    # Head takes Body as inputs
    #head_hindcast_input_size = body_hindcast_input_size 
    head_hindcast_input_size = body_hindcast_output_size
    head_forecast_input_size = body_forecast_output_size
    
    # Specific input size
    specific_head_hindcast_input_size = head_hindcast_input_size + additional_specific_head_hindcast_input_size
    specific_head_forecast_input_size = head_forecast_input_size + additional_specific_head_forecast_input_size
    
    for copy in range(copies):
        Hydra_Bodys[copy] = Google_Model_Block(body_hindcast_input_size, body_forecast_input_size, body_hindcast_output_size, body_forecast_output_size, body_hidden_size, body_num_layers, device, body_dropout, bidirectional)
        General_Heads[copy] = Google_Model_Block(head_hindcast_input_size, head_forecast_input_size, head_hindcast_output_size, head_forecast_output_size, head_hidden_size, head_num_layers, device, head_dropout, bidirectional)
        Basin_Heads[copy] = Specific_Heads(basins, specific_head_hindcast_input_size, specific_head_forecast_input_size, specific_head_hindcast_output_size, specific_head_forecast_output_size, specific_head_hidden_size, specific_head_num_layers, device, head_dropout, bidirectional)


        specific_head_parameters = list()
        for basin, model in Basin_Heads[copy].items():
            specific_head_parameters += list(model.parameters())

        optimizers[copy] = torch.optim.Adam(
        # Extra LR is the global learning rate, not really important
        [
            {"params": General_Heads[copy].parameters(), "lr": learning_rate_general_head},
            {"params": specific_head_parameters, "lr": learning_rate_head},
            {"params": Hydra_Bodys[copy].parameters(), "lr": learning_rate_body},
        ],
        lr=LR, weight_decay = 1e-4 ) #1e-4 good so far, 3 not so food

        general_optimizers[copy] = torch.optim.SGD(
        # Extra LR is the global learning rate, not really important
        [
            {"params": General_Heads[copy].parameters(), "lr": learning_rate_general_head},
            {"params": Hydra_Bodys[copy].parameters(), "lr": learning_rate_body},
        ],
        lr=LR, )
        schedulers[copy] = lr_scheduler.StepLR(optimizers[copy], 1, gamma=0.99) #.CosineAnnealingLR(optimizers[copy], T_max= 10, eta_min= 1e-4,)
        #.StepLR(optimizers[copy], 1, gamma=0.99) #
    return Hydra_Bodys, General_Heads, Basin_Heads, optimizers, schedulers, general_optimizers 

In [32]:
LR = 1e-3
static_size = np.shape(static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 0 #3
History_Statistics_in_forcings = 0 # 5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
body_hindcast_input_size = 8
body_forecast_input_size = forecast_input_size


Overall_Best_Val_Loss = 999

In [51]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper

# Fixed parameters
total_epochs = 300
n_epochs = 1 # Epochs between tests
group_lengths = [7] #np.arange(180)
batch_size = 1
copies = 1
head_output_size = 3

# parameters to tune
# chose 128, 2, 0.1, 1e-3, 6, 32, 1, 0.4, 1e-3
body_hidden_sizes =  [128]
body_num_layers = [1]
body_dropouts = [0.0] #[0.1, 0.4]
body_learning_rates = [1e-3]
body_outputs = [4] # Say hindcast and forecasts have same outputrs body_hindcast_output_size


head_hidden_sizes = [32]
head_num_layers = [1]
head_dropouts = [0.0] #[0.1, 0.4, 0.7]
head_learning_rates = [1e-3, 5e-3]
LR = 1e-3
bidirectionals = [False]
spec_multiplier = [1, 10]

config_space = {
    "body_hidden_size": tune.grid_search(body_hidden_sizes),
    "body_num_layer": tune.grid_search(body_num_layers),
    "body_dropout": tune.grid_search(body_dropouts),
    "bidirectional": tune.grid_search(bidirectionals),
    "body_output": tune.grid_search(body_outputs),
    "body_learning_rate": tune.grid_search(body_learning_rates),
    "head_hidden_size": tune.grid_search(head_hidden_sizes),
    "head_num_layer": tune.grid_search(head_num_layers),
    "head_dropout": tune.grid_search(head_dropouts),
    "head_learning_rate": tune.grid_search(head_learning_rates),
    "spec_multiplier": tune.grid_search(spec_multiplier)
    #'test_year': tune.grid_search(list(np.arange(2000,2024,2)) )

    #"general_head_learning_rate": tune.grid_search(head_learning_rates),
}

# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_Hydra_Model/'



In [52]:
def train_model_hydra(config):

    All_Dates = ray.get(All_Dates_id)  
    
    years = list(np.arange(2000,2024,2))
    test_year = 2022 #config['test_year']
    val_years = [years[years.index(test_year)-1], years[years.index(test_year)-2]  ]
    train_years = [year for year in years if year not in [test_year] + val_years]
    
    Test_Dates = All_Dates[All_Dates.year == test_year]
    Val_Dates = All_Dates[All_Dates.year.isin(val_years)]
    Train_Dates = All_Dates[All_Dates.year.isin(train_years)]

    
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)  
  

    body_save_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/General_Head_Model/Hydra_Body_LSTM.pth'
    head_save_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/General_Head_Model/Hydra_Head_LSTM.pth'
    basin_heads_save_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/Basin_Head_Model'
    
    loss_path = f'/data/Hydra_Work/No_Forecast_Validation_Models/{test_year}/General_Head_Model/Hydra_LSTM_loss.txt'

    val_loss = 1000
    
    
    if not os.path.exists(loss_path):
        # If the file does not exist, create it and write val_loss to it
        with open(loss_path, 'w') as file:
            file.write('%f' % val_loss)


    copies = 1
    warmup = 6
    best_val_loss = 999
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')
   

    general_head_learning_rate = config['body_learning_rate']
    Hydra_Bodys, General_Hydra_Heads, model_heads, optimizers, schedulers, general_optimizers  = define_models_hydra(body_hindcast_input_size, body_forecast_input_size, config['body_output'],
                                config['body_hidden_size'], config['body_num_layer'], config['body_dropout'], 
                                config['head_hidden_size'], config['head_num_layer'], 3, config['head_dropout'], config['bidirectional'], basins,
                                general_head_learning_rate, config['head_learning_rate'], config['body_learning_rate'], LR, device = device
                                )
     
    # Replace with already existing models 
    # General_Hydra_Heads = [torch.load(head_save_path)]
    # Hydra_Bodys = [torch.load(body_save_path)]
    # Specific_Heads = {}
    # for basin in basins:
    #     Specific_Heads[f'{basin}'] = torch.load(f"{basin_heads_save_path}/{basin}.path")
    # models_heads = [Specific_Heads]
    
    # optimizers = {}    
    # specific_head_parameters = list()
    # for basin, model in model_heads[0].items():
    #     specific_head_parameters += list(model.parameters())

    # optimizers[0] = torch.optim.Adam(
    # # Extra LR is the global learning rate, not really important
    # [
    #     {"params": General_Hydra_Heads[0].parameters(), "lr": config['body_learning_rate']},
    #     {"params": specific_head_parameters, "lr": config['body_learning_rate']},
    #     {"params": Hydra_Bodys[0].parameters(), "lr": config['body_learning_rate']},
    # ],
    # lr=LR, weight_decay = 1e-3 )
    # schedulers[0] = lr_scheduler.CosineAnnealingLR(optimizers[0], T_max= 100)


    
                                                
    general_losses, specific_losses, general_val_losses, specific_val_losses, val_losses = [], [], [], [], []

    # Initialise, with dummy scheduler
    for copy in range(copies):
        # Initialise
        dummy_scheduler = lr_scheduler.StepLR(optimizers[copy],step_size = warmup, gamma = 0.8)

        Model_Run(All_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
            Static_variables, general_optimizers[copy], dummy_scheduler, criterion, early_stopper= None, n_epochs= warmup,
            batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, feed_forcing = False)

            
    for epoch in range(total_epochs):
        train_general_losses = {}
        train_specific_losses = {}
        epoch_val_general_losses = {}
        epoch_val_specific_losses = {}
        climate_losses = {}
        
        for copy in range(copies):
                        

            # Full Training
            train_general_losses[copy], train_specific_losses[copy], climate_losses[copy] = Model_Run(Train_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs= n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, feed_forcing = False, spec_multiplier = config["spec_multiplier"])
            epoch_val_general_losses[copy], epoch_val_specific_losses[copy], climate_losses[copy] = Model_Run(Val_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs= n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, feed_forcing = False)

        general_loss = np.mean(list(train_general_losses.values()))
        specific_loss = np.mean(list(train_specific_losses.values()))
        climate_loss = np.mean(list(climate_losses.values()))
        
        epoch_val_general_loss = np.mean(list(epoch_val_general_losses.values())).mean()
        epoch_val_specific_loss = np.mean(list(epoch_val_specific_losses.values())).mean()
        
        
        general_losses.append(general_loss)
        specific_losses.append(specific_loss)
        specific_val_losses.append(epoch_val_specific_loss)
        general_val_losses.append(epoch_val_general_loss)

        val_loss = 0.5*(epoch_val_general_loss + epoch_val_specific_loss)
        
        candidate_val_loss = ((val_loss.mean() - climate_loss))/np.mean(climate_loss)
        best_val_loss = np.min([best_val_loss, candidate_val_loss ])
         
        with open(loss_path, 'r') as file:
            # Read the entire contents of the file
            Overall_Best_Val_Loss = float(file.read())

        if best_val_loss < Overall_Best_Val_Loss:
            with open(loss_path, 'w') as f:
                f.write('%f' % best_val_loss)

            torch.save(Hydra_Bodys[0], body_save_path)
            torch.save(General_Hydra_Heads[0], head_save_path)
            for basin in basins:
                torch.save(model_heads[0][basin], f"{basin_heads_save_path}/{basin}.path")
                
            
               
        ray.train.report({'val_loss' : best_val_loss})
        #print('Validation Loss', candidate_val_loss)
        print('Training Loss', 1 - (specific_loss / general_loss) )
        val_losses.append(best_val_loss)


    return best_val_loss



In [53]:
from ray import train, tune


ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)  
Val_Dates_id = ray.put(Val_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(static_variables)


2024-05-30 15:44:20,375	INFO worker.py:1724 -- Started a local Ray instance.


In [54]:
asha_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric='val_loss',
    mode='min',
    max_t=100,
    grace_period=20,
    reduction_factor=3,
    brackets=1,
)


plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 20,
    grace_period=20,
    mode="min",
)


In [55]:
runs_per_iteration = 4
def objective(config):  
    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    

    score = train_model_hydra(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}


run_config=train.RunConfig(stop= plateau_stopper)
# Can use fractions of GPU
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 15/runs_per_iteration, "gpu": 1/(runs_per_iteration)}), param_space=config_space, run_config = run_config) 

results = tuner.fit()
best_config = results.get_best_result(metric="val_loss", mode="min").config
print(best_config)
file_path = f"/data/Hydra_Work/Tuning/Config_Text/Hydral_Model_best_config.txt"

# Open the file in write mode and save the configuration
with open(file_path, "w") as f:
    f.write(str(best_config))

print("Best configuration saved to:", file_path)

0,1
Current time:,2024-05-30 16:24:38
Running for:,00:40:12.68
Memory:,48.4/125.9 GiB

Trial name,status,loc,bidirectional,body_dropout,body_hidden_size,body_learning_rate,body_num_layer,body_output,head_dropout,head_hidden_size,head_learning_rate,head_num_layer,spec_multiplier,iter,total time (s),val_loss
objective_7e743_00000,RUNNING,136.156.133.98:624661,False,0,128,0.001,1,4,0,32,0.001,1,1,36,2390.52,0.162531
objective_7e743_00001,RUNNING,136.156.133.98:624662,False,0,128,0.001,1,4,0,32,0.005,1,1,37,2402.31,-0.01856
objective_7e743_00002,RUNNING,136.156.133.98:624663,False,0,128,0.001,1,4,0,32,0.001,1,10,36,2379.18,0.249431
objective_7e743_00003,RUNNING,136.156.133.98:624664,False,0,128,0.001,1,4,0,32,0.005,1,10,37,2393.87,-0.0375056




[36m(objective pid=624664)[0m Training Loss 6.659820812810651
[36m(objective pid=624661)[0m Training Loss 7.21976998369408[32m [repeated 2x across cluster][0m
[36m(objective pid=624663)[0m Training Loss 7.076786775571157
[36m(objective pid=624664)[0m Training Loss 5.945127211624957
[36m(objective pid=624661)[0m Training Loss 6.20652062348182[32m [repeated 2x across cluster][0m
[36m(objective pid=624663)[0m Training Loss 6.654894722823616
[36m(objective pid=624664)[0m Training Loss 5.725508874508699
[36m(objective pid=624661)[0m Training Loss 5.801343070092966[32m [repeated 2x across cluster][0m
[36m(objective pid=624663)[0m Training Loss 6.172613645044768
[36m(objective pid=624664)[0m Training Loss 5.124006579852385
[36m(objective pid=624661)[0m Training Loss 5.7735241522015395[32m [repeated 2x across cluster][0m
[36m(objective pid=624663)[0m Training Loss 6.034910353437721
[36m(objective pid=624664)[0m Training Loss 4.348406151512117
[36m(objective p

In [None]:
results_df = results.get_dataframe()

In [None]:
results_df[results_df['val_loss'] < -0.1]#[['val_loss', 'config/body_output']]