In [1]:
import os
import joblib
import pandas as pd
import numpy as np
import random
import itertools

import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')

import sys
sys.path.append('/data/Hydra_Work/Competition_Functions') 
from Processing_Functions import process_forecast_date, process_seasonal_forecasts
from Data_Transforming import read_nested_csvs, generate_daily_flow, use_USGS_flow_data, USGS_to_daily_df_yearly

sys.path.append('/data/Hydra_Work/Pipeline_Functions')
from Folder_Work import filter_rows_by_year, csv_dictionary, add_day_of_year_column

sys.path.append('/data/Hydra_Work/Post_Rodeo_Work/ML_Functions.py')
from Full_LSTM_ML_Functions import Specific_Heads, Google_Model_Block, SumPinballLoss, EarlyStopper, Model_Run, No_Body_Model_Run



from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim.lr_scheduler as lr_scheduler


In [2]:
# All the prep
monthly_basins = ['animas_r_at_durango', 'boise_r_nr_boise', 'boysen_reservoir_inflow', 'colville_r_at_kettle_falls', 'detroit_lake_inflow', 'dillon_reservoir_inflow',
    'fontenelle_reservoir_inflow', 'green_r_bl_howard_a_hanson_dam', 'hungry_horse_reservoir_inflow', 'libby_reservoir_inflow',
    'missouri_r_at_toston','owyhee_r_bl_owyhee_dam', 'pecos_r_nr_pecos', 'pueblo_reservoir_inflow',
    'ruedi_reservoir_inflow', 'skagit_ross_reservoir', 'snake_r_nr_heise', 'stehekin_r_at_stehekin', 'sweetwater_r_nr_alcova',
    'taylor_park_reservoir_inflow', 'virgin_r_at_virtin', 'weber_r_nr_oakley', 'yampa_r_nr_maybell',
]


USGS_basins = ['animas_r_at_durango', 'boise_r_nr_boise', 'boysen_reservoir_inflow', 'colville_r_at_kettle_falls', 'detroit_lake_inflow', 'dillon_reservoir_inflow',   
    'green_r_bl_howard_a_hanson_dam', 'hungry_horse_reservoir_inflow', 'libby_reservoir_inflow', 'merced_river_yosemite_at_pohono_bridge', 'missouri_r_at_toston',
    'owyhee_r_bl_owyhee_dam', 'pecos_r_nr_pecos', 'pueblo_reservoir_inflow',    'san_joaquin_river_millerton_reservoir', 'snake_r_nr_heise', 'stehekin_r_at_stehekin',
    'sweetwater_r_nr_alcova', 'taylor_park_reservoir_inflow', 'virgin_r_at_virtin', 'weber_r_nr_oakley', 'yampa_r_nr_maybell',
]

basins = list(set(monthly_basins + USGS_basins))


selected_years = range(2000,2024,2)

era5_folder = '/data/Hydra_Work/Rodeo_Data/era5'
era5 = csv_dictionary(era5_folder, basins, years=selected_years)
era5 = add_day_of_year_column(era5)

flow_folder = '/data/Hydra_Work/Rodeo_Data/train_monthly_naturalized_flow'
flow = csv_dictionary(flow_folder, monthly_basins)
flow = filter_rows_by_year(flow, 1998)

climatology_file_path = '/data/Hydra_Work/Rodeo_Data/climate_indices.csv'
climate_indices = pd.read_csv(climatology_file_path)
climate_indices['date'] = pd.to_datetime(climate_indices['date'])
climate_indices.set_index('date', inplace = True)
climate_indices.drop('Unnamed: 0', axis = 1, inplace = True)
climate_indices = climate_indices[~climate_indices.index.duplicated(keep='first')]

root_folder = '/data/Hydra_Work/Rodeo_Data/seasonal_forecasts'
seasonal_forecasts = read_nested_csvs(root_folder)

USGS_flow_folder = '/data/Hydra_Work/Rodeo_Data/USGS_streamflows'
USGS_flow = csv_dictionary(USGS_flow_folder, USGS_basins)

Static_variables = pd.read_csv('/data/Hydra_Work/Rodeo_Data/static_indices.csv', index_col= 'site_id')

# Convert monthly flow values to daily flow estimates
daily_flow = {}

# Iterate through the dictionary and apply generate_daily_flow to each DataFrame
for key, df in flow.items():
    daily_flow[key] = generate_daily_flow(df, persistence_factor=0.7)

# Replacing monhtly data for normalised USGS when available
daily_flow = use_USGS_flow_data(daily_flow, USGS_flow)


normalising_basins = ['san_joaquin_river_millerton_reservoir', 'merced_river_yosemite_at_pohono_bridge', 'detroit_lake_inflow']

for basin in normalising_basins:
    path = f'/data/Hydra_Work/Rodeo_Data/USGS_streamflows/{basin}.csv' 
    normalising_path = f'/data/Hydra_Work/Rodeo_Data/train_yearly/{basin}.csv'
    USGS_to_daily_df_yearly(daily_flow, path, basin, normalising_path)

climate_scaler_filename = '/data/Hydra_Work/Rodeo_Data/scalers/climate_normalization_scaler.save'
climate_scaler = joblib.load(climate_scaler_filename) 
climate_indices = pd.DataFrame(climate_scaler.transform(climate_indices), columns=climate_indices.columns, index=climate_indices.index)

era5_scaler_filename = '/data/Hydra_Work/Rodeo_Data/scalers/era5_scaler.save'
era5_scaler = joblib.load(era5_scaler_filename) 
era5 = {key: pd.DataFrame(era5_scaler.transform(df), columns=df.columns, index=df.index) for key, df in era5.items()}

for basin, df in daily_flow.items(): 
    flow_scaler_filename = f'/data/Hydra_Work/Rodeo_Data/scalers/flows/{basin}_flow_scaler.save'
    flow_scaler = joblib.load(flow_scaler_filename) 
    daily_flow[basin] = pd.DataFrame(flow_scaler.transform(df), columns=df.columns, index=df.index)

seasonal_scaler_filename = "/data/Hydra_Work/Rodeo_Data/scalers/seasonal_scaler.save"
seasonal_scaler = joblib.load(seasonal_scaler_filename)
seasonal_forecasts = {key: pd.DataFrame(seasonal_scaler.transform(df), columns=df.columns, index=df.index ) for key, df in seasonal_forecasts.items()}

static_scaler_filename = '/data/Hydra_Work/Rodeo_Data/scalers/static_scaler.save'
static_scaler = joblib.load(static_scaler_filename) 
Static_variables = pd.DataFrame(static_scaler.transform(Static_variables), columns=Static_variables.columns, index=Static_variables.index)

climatological_flows = {}

for basin, df in daily_flow.items():
    # Extract day of year and flow values
    df['day_of_year'] = df.index.dayofyear

    grouped = df.groupby('day_of_year')['daily_flow'].quantile([0.1, 0.5, 0.9]).unstack(level=1)

    climatological_flows[basin] = pd.DataFrame({
        'day_of_year': grouped.index,
        '10th_percentile_flow': grouped[0.1],
        '50th_percentile_flow': grouped[0.5],
        '90th_percentile_flow': grouped[0.9]
    })
    
    climatological_flows[basin].set_index('day_of_year', inplace=True)

    # Drop the temporary 'day_of_year' column from the original dataframe
    df.drop(columns='day_of_year', inplace=True)

criterion = SumPinballLoss(quantiles = [0.1, 0.5, 0.9])

basin = 'animas_r_at_durango' 
All_Dates = daily_flow[basin].index[
    ((daily_flow[basin].index.month < 6) | ((daily_flow[basin].index.month == 6) & (daily_flow[basin].index.day < 25))) &
    ((daily_flow[basin].index.year % 2 == 0) | ((daily_flow[basin].index.month > 10) | ((daily_flow[basin].index.month == 10) & (daily_flow[basin].index.day >= 1))))
]
All_Dates = All_Dates[All_Dates.year > 1998]


# Validation Year
Val_Dates = All_Dates[All_Dates.year == 2022]
All_Dates = All_Dates[All_Dates.year < 2022]


basin_to_remove = 'sweetwater_r_nr_alcova'

if basin_to_remove in basins:
    basins.remove(basin_to_remove)


seed = 42 ; torch.manual_seed(seed) ; random.seed(seed) ; np.random.seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

days  = 90
hindcast_input_size = 17

LR = 1e-3
static_size = np.shape(Static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 3
History_Statistics_in_forcings = 5*2

head_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
head_output_size = 3

# Be careful of this: Trying to unpickle estimator MinMaxScaler from version 1.3.0 when using version 1.4.1.post1

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-

ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- mean_gws_inst
- mean_rtzsm_inst
- mean_sfsm_inst


# Tuning individual basins

In [None]:
LR = 1e-3
static_size = np.shape(Static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 3
History_Statistics_in_forcings = 5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
hindcast_input_size = 17



In [None]:
# Do we want hindcast and forecast num-layers to be different?
def define_models(hindcast_input_size, forecast_input_size, hidden_size, num_layers, dropout, bidirectional, learning_rate, copies = 3, forecast_output_size = 3, device = device):
    models = {}
    params_to_optimize = {}
    optimizers = {}
    schedulers = {}
    
    hindcast_output_size = forecast_output_size
    for copy in range(copies):
        models[copy] = Google_Model_Block(hindcast_input_size, forecast_input_size, hindcast_output_size, forecast_output_size, hidden_size, num_layers, device, dropout, bidirectional)
        
        models[copy].to(device)
        params_to_optimize[copy] = list(models[copy].parameters())

        optimizers[copy] = torch.optim.Adam(params_to_optimize[copy], lr= learning_rate, weight_decay = 1e-3)
        schedulers[copy] = lr_scheduler.CosineAnnealingLR(optimizers[copy], T_max=1e4)

    return models, params_to_optimize, optimizers, schedulers

def update_final_parameters(Final_Parameters, basin, min_val_loss_parameters, min_val_loss):
    Final_Parameters['basin'].append(basin)
    Final_Parameters['hidden_size'].append(min_val_loss_parameters[0])
    Final_Parameters['num_layers'].append(min_val_loss_parameters[1])
    Final_Parameters['dropout'].append(min_val_loss_parameters[2])
    Final_Parameters['bidirectional'].append(min_val_loss_parameters[3])
    Final_Parameters['learning_rate'].append(min_val_loss_parameters[4])
    Final_Parameters['val_loss'].append(min_val_loss)


In [None]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper
from ray.tune.search.optuna import OptunaSearch
import optuna

# Fixed parameters
total_epochs = 2
n_epochs = 1  # Epochs between tests
group_lengths = np.arange(180)
batch_size = 1
copies = 3

# parameters to tune
hidden_sizes = [16, 64, 128]
num_layers =  [1,3]
dropout = [0.1, 0.4]
bidirectional = [False, True]
learning_rate = [1e-3, 1e-5]

# Set up configuration space
config_space = {
    "hidden_size": tune.grid_search(hidden_sizes),
    "num_layers": tune.grid_search(num_layers),
    "dropout": tune.grid_search(dropout),
    "bidirectional": tune.grid_search(bidirectional),
    "learning_rate": tune.grid_search(learning_rate)
}

def define_optuna_search_space(trial: optuna.Trial):
    trial.suggest_categorical("hidden_size", hidden_sizes)
    trial.suggest_categorical("num_layers", num_layers)
    trial.suggest_categorical("dropout", dropout)
    trial.suggest_categorical("bidirectional", bidirectional)
    trial.suggest_categorical("learning_rate", learning_rate)

optuna_config_space = {
    "hidden_size": tune.lograndint(16,128),
    "num_layers": tune.randint(1,3),
    "dropout": tune.uniform(0.1,0.7),
    "bidirectional": tune.choice(bidirectional),
    "learning_rate": tune.loguniform(1e-3, 1e-5)
}
    



  from .autonotebook import tqdm as notebook_tqdm
2024-04-02 10:18:45,994	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


2024-04-02 10:18:46,083	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [None]:
def train_model(config):

    All_Dates = ray.get(All_Dates_id)  
    Val_Dates = ray.get(Val_Dates_id)  
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)



    copies = 3
    
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')
    
    models, params_to_optimize, optimizers, schedulers = define_models(hindcast_input_size, forecast_input_size,
    config["hidden_size"], config["num_layers"], config["dropout"],
    config["bidirectional"], config["learning_rate"], copies=copies, device = device)


    losses, val_losses = [], []

    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}

        for copy in range(copies):

             # Need to fix the outputs of No_Body_Model_Run
            train_losses[copy] = No_Body_Model_Run(All_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised=False)
            epoch_val_losses[copy] = No_Body_Model_Run(Val_Dates, [basin], models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialised=False)

        loss = np.mean(list(train_losses.values()))
        val_loss = np.mean(list(epoch_val_losses.values())).mean()

        ray.train.report({'val_loss' : val_loss})

        losses.append(loss)
        val_losses.append(val_loss)


    return val_loss

    


In [None]:
from ray import train, tune



ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)  
Val_Dates_id = ray.put(Val_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(Static_variables)


2024-04-02 10:18:49,194	INFO worker.py:1724 -- Started a local Ray instance.


In [None]:
asha_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric='val_loss',
    mode='min',
    max_t=100,
    grace_period=10,
    reduction_factor=3,
    brackets=1,
)


optuna_search = OptunaSearch(
    define_optuna_search_space,
    metric="val_loss",
    mode="min")

plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 4,
    grace_period=10,
    mode="min",
)


[I 2024-04-02 10:18:49,884] A new study created in memory with name: optuna


In [None]:
def objective(config):  
    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    
    print('Device available is', device)
    

    score = train_model(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}

basin = 'stehekin_r_at_stehekin'

#, search_alg = optuna_search
optuna_tune_config = tune.TuneConfig(scheduler=asha_scheduler)
tune_config = tune.TuneConfig(scheduler=asha_scheduler)
run_config=train.RunConfig(stop= plateau_stopper)

# Without Optuna
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space=config_space, tune_config = tune_config, run_config = run_config) 
# With Optuna
#tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space = optuna_config_space, tune_config = optuna_tune_config, run_config = run_config) 

results = tuner.fit()
print(results.get_best_result(metric="val_loss", mode="min").config)

[36m(objective pid=805923)[0m Device available is cuda




[36m(objective pid=805923)[0m Device available is cuda[32m [repeated 16x across cluster][0m




[36m(objective pid=805899)[0m Device available is cuda[32m [repeated 4x across cluster][0m




[36m(objective pid=805902)[0m Device available is cuda[32m [repeated 3x across cluster][0m




[36m(objective pid=805933)[0m Device available is cuda[32m [repeated 2x across cluster][0m
[36m(objective pid=805908)[0m Device available is cuda
[36m(objective pid=805917)[0m Device available is cuda
[36m(objective pid=805837)[0m Device available is cuda[32m [repeated 2x across cluster][0m
[36m(objective pid=805841)[0m Device available is cuda[32m [repeated 2x across cluster][0m
[36m(objective pid=805923)[0m Device available is cuda
[36m(objective pid=805880)[0m Device available is cuda
[36m(objective pid=805899)[0m Device available is cuda[32m [repeated 2x across cluster][0m
[36m(objective pid=805796)[0m Device available is cuda[32m [repeated 2x across cluster][0m
[36m(objective pid=805908)[0m Device available is cuda
[36m(objective pid=805918)[0m Device available is cuda
[36m(objective pid=805933)[0m Device available is cuda[32m [repeated 2x across cluster][0m
[36m(objective pid=805939)[0m Device available is cuda
[36m(objective pid=805825)[0m

2024-04-02 11:12:09,340	INFO tune.py:1042 -- Total run time: 3199.35 seconds (3199.31 seconds for the tuning loop).


{'hidden_size': 16, 'num_layers': 1, 'dropout': 0.4, 'bidirectional': True, 'learning_rate': 0.001}


In [None]:
# Loading models
Tuned_Models = {}
for basin in basins:
    Tuned_Models[basin] = torch.load(f'/data/Hydra_Work/Post_Rodeo_Work/Tuned_Single_Models/basin.pth')


FileNotFoundError: [Errno 2] No such file or directory: '/data/Hydra_Work/Post_Rodeo_Work/Tuned_Single_Models/basin.pth'

# Tuning General Model

In [None]:
LR = 1e-3
static_size = np.shape(Static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 3
History_Statistics_in_forcings = 5*2

input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3

In [None]:
def update_final_parameters_general(Final_Parameters, min_val_loss_parameters, min_val_loss):
    Final_Parameters['hidden_size'].append(min_val_loss_parameters[0])
    Final_Parameters['num_layers'].append(min_val_loss_parameters[1])
    Final_Parameters['dropout'].append(min_val_loss_parameters[2])
    Final_Parameters['bidirectional'].append(min_val_loss_parameters[3])
    Final_Parameters['learning_rate'].append(min_val_loss_parameters[4])
    Final_Parameters['val_loss'].append(min_val_loss)

In [None]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper

# Fixed parameters
total_epochs = 2
n_epochs = 1 # Epochs between tests
group_lengths = np.arange(180)
batch_size = 1
copies = 2

# parameters to tune
# I tuned to 128,2,0.1,False,1e-3 
hidden_sizes = [64, 128, 256]
num_layers = [1,3]
dropout = [0.1, 0.4]
bidirectional =  [False, True]
learning_rate = [1e-2, 1e-3, 1e-5]

config_space = {
    "hidden_size": tune.grid_search(hidden_sizes),
    "num_layers": tune.grid_search(num_layers),
    "dropout": tune.grid_search(dropout),
    "bidirectional": tune.grid_search(bidirectional),
    "learning_rate": tune.grid_search(learning_rate)
}


# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/'

In [None]:
def train_model_general(config):

    All_Dates = ray.get(All_Dates_id)  
    Val_Dates = ray.get(Val_Dates_id)  
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)

    copies = 3
    
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')
   
    models, params_to_optimize, optimizers, schedulers = define_models(hindcast_input_size, forecast_input_size,
    config["hidden_size"], config["num_layers"], config["dropout"],
    config["bidirectional"], config["learning_rate"], copies=copies, device = device)

    losses, val_losses = [], []

    for epoch in range(total_epochs):

        train_losses = {}
        epoch_val_losses = {}

        for copy in range(copies):

             # Need to fix the outputs of No_Body_Model_Run
            train_losses[copy] = No_Body_Model_Run(All_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, specialised=False)
            epoch_val_losses[copy] = No_Body_Model_Run(Val_Dates, basins, models[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, specialised=False)

        loss = np.mean(list(train_losses.values()))
        val_loss = np.mean(list(epoch_val_losses.values())).mean()

        ray.train.report({'val_loss' : val_loss})

        losses.append(loss)
        val_losses.append(val_loss)

    return val_loss

In [None]:
from ray import train, tune



ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)  
Val_Dates_id = ray.put(Val_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(Static_variables)

In [None]:
asha_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric='val_loss',
    mode='min',
    max_t=100,
    grace_period=10,
    reduction_factor=3,
    brackets=1,
)


optuna_search = OptunaSearch(
    define_optuna_search_space,
    metric="val_loss",
    mode="min")

plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 4,
    grace_period=10,
    mode="min",
)


[I 2024-03-25 15:28:47,130] A new study created in memory with name: optuna


In [None]:
def objective(config):  
    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    
    print('Device available is', device)
    

    score = train_model_general(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}


#, search_alg = optuna_search
optuna_tune_config = tune.TuneConfig(scheduler=asha_scheduler)
tune_config = tune.TuneConfig(scheduler=asha_scheduler)
run_config=train.RunConfig(stop= plateau_stopper)

# Without Optuna
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space=config_space, tune_config = tune_config, run_config = run_config) 
# With Optuna
#tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space = optuna_config_space, tune_config = optuna_tune_config, run_config = run_config) 

results = tuner.fit()
print(results.get_best_result(metric="val_loss", mode="min").config)

2024-03-25 15:28:53,823	ERROR tune_controller.py:1374 -- Trial task failed for trial objective_5f821_00009
Traceback (most recent call last):
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/_private/worker.py", line 2624, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(NameError): [36mray::ImplicitFunc.train()[39m (pid=533856, ip=136.1

RuntimeError: No best trial found for the given metric: val_loss. This means that no trial has reported this metric, or all values reported for this metric are NaN. To not ignore NaN values, you can set the `filter_nan_and_inf` arg to False.

In [None]:
General_Model = torch.load('/data/Hydra_Work/Post_Rodeo_Work/Tuned_General_Model/General_model.pth')



# Tuning Hydra Model

In [None]:
def define_models_hydra(body_hindcast_input_size, body_forecast_input_size, body_output_size, body_hidden_size, body_num_layers, body_dropout,
                        head_hidden_size, head_num_layers, head_forecast_output_size, head_dropout, bidirectional, basins,
                        learning_rate_general_head, learning_rate_head, learning_rate_body, LR = 1e-3, 
                        additional_specific_head_hindcast_input_size = 1, additional_specific_head_forecast_input_size = 0,
                        copies=3, device=None):
    Hydra_Bodys = {}
    Basin_Heads = {}
    General_Heads = {}    
    params_to_optimize = {}
    optimizers = {}
    schedulers = {}
    
    body_forecast_output_size = body_output_size
    body_hindcast_output_size = body_output_size
    
    # Define head hindcast size as head-forecast for simplicty
    head_hindcast_output_size = head_forecast_output_size
    specific_head_hindcast_output_size = head_forecast_output_size
    specific_head_forecast_output_size = head_forecast_output_size
    specific_head_hidden_size = head_hidden_size
    specific_head_num_layers = head_num_layers
    
    # Head takes Body as inputs
    #head_hindcast_input_size = body_hindcast_input_size 
    head_hindcast_input_size = body_hindcast_output_size
    head_forecast_input_size = body_forecast_output_size
    
    # Specific input size
    specific_head_hindcast_input_size = head_hindcast_input_size + additional_specific_head_hindcast_input_size
    specific_head_forecast_input_size = head_forecast_input_size + additional_specific_head_forecast_input_size
    

    
    for copy in range(copies):
        Hydra_Bodys[copy] = Google_Model_Block(body_hindcast_input_size, body_forecast_input_size, body_hindcast_output_size, body_forecast_output_size, body_hidden_size, body_num_layers, device, body_dropout, bidirectional)
        General_Heads[copy] = Google_Model_Block(head_hindcast_input_size, head_forecast_input_size, head_hindcast_output_size, head_forecast_output_size, head_hidden_size, head_num_layers, device, head_dropout, bidirectional)
        Basin_Heads[copy] = Specific_Heads(basins, specific_head_hindcast_input_size, specific_head_forecast_input_size, specific_head_hindcast_output_size, specific_head_forecast_output_size, specific_head_hidden_size, specific_head_num_layers, device, head_dropout, bidirectional)


        specific_head_parameters = list()
        for basin, model in Basin_Heads[copy].items():
            specific_head_parameters += list(model.parameters())

        optimizers[copy] = torch.optim.Adam(
        # Extra LR is the global learning rate, not really important
        [
            {"params": General_Heads[copy].parameters(), "lr": learning_rate_general_head},
            {"params": specific_head_parameters, "lr": learning_rate_head},
            {"params": Hydra_Bodys[copy].parameters(), "lr": learning_rate_body},
        ],
        lr=LR, )

        schedulers[copy] = lr_scheduler.CosineAnnealingLR(optimizers[copy], T_max=1e4)

    return Hydra_Bodys, General_Heads, Basin_Heads, optimizers, schedulers 



def update_final_parameters_hydra(Final_Parameters, min_val_loss_parameters, min_val_loss):
    # Append body parameters
    Final_Parameters['body_hidden_size'].append(min_val_loss_parameters[0])
    Final_Parameters['body_num_layers'].append(min_val_loss_parameters[1])
    Final_Parameters['body_dropout'].append(min_val_loss_parameters[2])
    Final_Parameters['body_learning_rate'].append(min_val_loss_parameters[3])
    Final_Parameters['body_output'].append(min_val_loss_parameters[4])
    # Append head parameters
    Final_Parameters['head_hidden_size'].append(min_val_loss_parameters[5])
    Final_Parameters['head_num_layers'].append(min_val_loss_parameters[6])
    Final_Parameters['head_dropout'].append(min_val_loss_parameters[7])
    Final_Parameters['head_learning_rate'].append(min_val_loss_parameters[8])
    # Append validation loss
    Final_Parameters['val_loss'].append(min_val_loss)

In [None]:
LR = 1e-3
static_size = np.shape(Static_variables)[1]
forecast_size = np.shape(seasonal_forecasts['american_river_folsom_lake_2000_apr'])[1]
History_Fourier_in_forcings = 0 #2*3*(6 - 1)
Climate_guess = 3
History_Statistics_in_forcings = 5*2

forecast_input_size = forecast_size + static_size + History_Fourier_in_forcings + History_Statistics_in_forcings  + Climate_guess + 3
output_size, head_hidden_size, head_num_layers =  3, 64, 3
body_hindcast_input_size = 16
body_forecast_input_size = forecast_input_size

In [None]:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.stopper import TrialPlateauStopper

# Fixed parameters
total_epochs = 2
n_epochs = 1 # Epochs between tests
group_lengths = np.arange(180)
batch_size = 1
copies = 3
head_output_size = 3

# parameters to tune
# chose 128, 2, 0.1, 1e-3, 6, 32, 1, 0.4, 1e-3
body_hidden_sizes = [64, 128, 256]
body_num_layers =  [1, 3]
body_dropouts = [0.1, 0.4]
body_learning_rates = [1e-3, 1e-5]
body_outputs = [3, 6, 10] # Say hindcast and forecasts have same outputrs body_hindcast_output_size


head_hidden_sizes = [16, 32, 64]
head_num_layers = [1, 3]
head_dropouts = [0.1, 0.4, 0.7]
head_learning_rates = [1e-3, 1e-5]
LR = 1e-3
bidirectionals = [False, True]

config_space = {
    "body_hidden_size": tune.grid_search(body_hidden_sizes),
    "body_num_layer": tune.grid_search(body_num_layers),
    "body_dropout": tune.grid_search(body_dropouts),
    "bidirectional": tune.grid_search(bidirectionals),
    "body_output": tune.grid_search(body_outputs),
    "body_learning_rate": tune.grid_search(body_learning_rates),
    "head_hidden_size": tune.grid_search(head_hidden_sizes),
    "head_num_layer": tune.grid_search(head_num_layers),
    "head_dropout": tune.grid_search(head_dropouts),
    "head_learning_rate": tune.grid_search(head_learning_rates),
    "general_head_learning_rate": tune.grid_search(head_learning_rates),
}

# Places to save info
model_dir = '/data/Hydra_Work/Post_Rodeo_Work/Tuned_Hydra_Model/'



In [None]:
daily_flow['yampa_r_nr_maybell']


Unnamed: 0_level_0,daily_flow
date,Unnamed: 1_level_1
1998-01-01,-0.500497
1998-01-02,-0.502579
1998-01-03,-0.497017
1998-01-04,-0.494009
1998-01-05,-0.493471
...,...
2022-06-26,0.190169
2022-06-27,0.073267
2022-06-28,0.028925
2022-06-29,-0.051698


In [None]:
from ray import train, tune



ray.shutdown()
ray.init(runtime_env = { "env_vars":   {"PYTHONPATH": '/data/Hydra_Work/Competition_Functions/' } } )
         
All_Dates_id = ray.put(All_Dates)  
Val_Dates_id = ray.put(Val_Dates)  
era5_id = ray.put(era5)  
daily_flow_id = ray.put(daily_flow)  
climatological_flows_id = ray.put(climatological_flows)
climate_indices_id = ray.put(climate_indices)
seasonal_forecasts_id = ray.put(seasonal_forecasts)
Static_variables_id = ray.put(Static_variables)

2024-03-28 12:11:42,303	INFO worker.py:1724 -- Started a local Ray instance.


In [None]:
def train_model_hydra(config):

    All_Dates = ray.get(All_Dates_id)  
    Val_Dates = ray.get(Val_Dates_id)  
    era5 = ray.get(era5_id)  
    daily_flow = ray.get(daily_flow_id)  
    climatological_flows = ray.get(climatological_flows_id)
    climate_indices = ray.get(climate_indices_id)
    seasonal_forecasts = ray.get(seasonal_forecasts_id)
    Static_variables = ray.get(Static_variables_id)

    copies = 3
    
    device = torch.device('cuda' if torch.cuda.
                    is_available() else 'cpu')
   

    Hydra_Bodys, General_Hydra_Heads, model_heads, optimizers, schedulers  = define_models_hydra(body_hindcast_input_size, body_forecast_input_size, config['body_output'],
                                config['body_hidden_size'], config['body_num_layer'], config['body_dropout'], 
                                config['head_hidden_size'], config['head_num_layer'], 3, config['head_dropout'], config['bidirectional'], basins,
                                config['general_head_learning_rate'], config['head_learning_rate'], config['body_learning_rate'], LR, device = device
                                )
     

    general_losses, specific_losses, general_val_losses, specific_val_losses = [], [], [], []


    for epoch in range(total_epochs):
        train_general_losses = {}
        train_specific_losses = {}
        epoch_val_general_losses = {}
        epoch_val_specific_losses = {}
        climate_losses = {}
        
        for copy in range(copies):

            train_general_losses[copy], train_specific_losses[copy], climate_losses[copy] = Model_Run(All_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=True, device=device, feed_forcing = False)
            epoch_val_general_losses[copy], epoch_val_specific_losses[copy], climate_losses[copy] = Model_Run(Val_Dates, basins, Hydra_Bodys[copy], General_Hydra_Heads[copy], model_heads[copy], era5, daily_flow, climatological_flows, climate_indices, seasonal_forecasts,
                Static_variables, optimizers[copy], schedulers[copy], criterion, early_stopper= None, n_epochs=n_epochs,
                batch_size=batch_size, group_lengths=group_lengths, Train_Mode=False, device=device, feed_forcing = False)

        general_loss = np.mean(list(train_general_losses.values()))
        specific_loss = np.mean(list(train_specific_losses.values()))
        
        epoch_val_general_loss = np.mean(list(epoch_val_general_losses.values())).mean()
        epoch_val_specific_loss = np.mean(list(epoch_val_specific_losses.values())).mean()
        
        
        general_losses.append(general_loss)
        specific_losses.append(specific_loss)
        specific_val_losses.append(epoch_val_specific_loss)
        general_val_losses.append(epoch_val_general_loss)

        val_loss = 0.5*(epoch_val_general_loss + epoch_val_specific_loss) 
        ray.train.report({'val_loss' : val_loss})

        losses.append(loss)
        val_losses.append(val_loss)

    return val_loss

In [None]:
asha_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric='val_loss',
    mode='min',
    max_t=100,
    grace_period=10,
    reduction_factor=3,
    brackets=1,
)


optuna_search = OptunaSearch(
    define_optuna_search_space,
    metric="val_loss",
    mode="min")

plateau_stopper = TrialPlateauStopper(
    metric="val_loss",
    num_results = 4,
    grace_period=10,
    mode="min",
)


[I 2024-03-28 12:11:43,234] A new study created in memory with name: optuna


In [None]:
def objective(config):  
    device = torch.device('cuda' if torch.cuda.
                      is_available() else 'cpu')
    

    score = train_model_hydra(config) # Have training loop in here that outputs loss of model
    return {"val_loss": score}


# Can use fractions of GPU
tuner = tune.Tuner(tune.with_resources(tune.with_parameters(objective), resources={"cpu": 1, "gpu": 1/16}), param_space=config_space) 

results = tuner.fit()
print(results.get_best_result(metric="val_loss", mode="min").config)

0,1
Current time:,2024-03-28 12:37:00
Running for:,00:25:15.55
Memory:,69.7/125.9 GiB

Trial name,# failures,error file
objective_58137_00000,1,"/home/gbmc/ray_results/objective_2024-03-28_12-11-44/objective_58137_00000_0_bidirectional=False,body_dropout=0.1000,body_hidden_size=64,body_learning_rate=0.0010,body_num_layer=1,bod_2024-03-28_12-11-46/error.txt"
objective_58137_00001,1,"/home/gbmc/ray_results/objective_2024-03-28_12-11-44/objective_58137_00001_1_bidirectional=True,body_dropout=0.1000,body_hidden_size=64,body_learning_rate=0.0010,body_num_layer=1,body_2024-03-28_12-11-46/error.txt"
objective_58137_00002,1,"/home/gbmc/ray_results/objective_2024-03-28_12-11-44/objective_58137_00002_2_bidirectional=False,body_dropout=0.4000,body_hidden_size=64,body_learning_rate=0.0010,body_num_layer=1,bod_2024-03-28_12-11-46/error.txt"
objective_58137_00003,1,"/home/gbmc/ray_results/objective_2024-03-28_12-11-44/objective_58137_00003_3_bidirectional=True,body_dropout=0.4000,body_hidden_size=64,body_learning_rate=0.0010,body_num_layer=1,body_2024-03-28_12-11-46/error.txt"
objective_58137_00004,1,"/home/gbmc/ray_results/objective_2024-03-28_12-11-44/objective_58137_00004_4_bidirectional=False,body_dropout=0.1000,body_hidden_size=128,body_learning_rate=0.0010,body_num_layer=1,bo_2024-03-28_12-11-46/error.txt"
objective_58137_00005,1,"/home/gbmc/ray_results/objective_2024-03-28_12-11-44/objective_58137_00005_5_bidirectional=True,body_dropout=0.1000,body_hidden_size=128,body_learning_rate=0.0010,body_num_layer=1,bod_2024-03-28_12-11-46/error.txt"
objective_58137_00006,1,"/home/gbmc/ray_results/objective_2024-03-28_12-11-44/objective_58137_00006_6_bidirectional=False,body_dropout=0.4000,body_hidden_size=128,body_learning_rate=0.0010,body_num_layer=1,bo_2024-03-28_12-11-46/error.txt"
objective_58137_00007,1,"/home/gbmc/ray_results/objective_2024-03-28_12-11-44/objective_58137_00007_7_bidirectional=True,body_dropout=0.4000,body_hidden_size=128,body_learning_rate=0.0010,body_num_layer=1,bod_2024-03-28_12-11-46/error.txt"
objective_58137_00008,1,"/home/gbmc/ray_results/objective_2024-03-28_12-11-44/objective_58137_00008_8_bidirectional=False,body_dropout=0.1000,body_hidden_size=256,body_learning_rate=0.0010,body_num_layer=1,bo_2024-03-28_12-11-46/error.txt"
objective_58137_00009,1,"/home/gbmc/ray_results/objective_2024-03-28_12-11-44/objective_58137_00009_9_bidirectional=True,body_dropout=0.1000,body_hidden_size=256,body_learning_rate=0.0010,body_num_layer=1,bod_2024-03-28_12-11-46/error.txt"

Trial name,status,loc,bidirectional,body_dropout,body_hidden_size,body_learning_rate,body_num_layer,body_output,general_head_learnin g_rate,head_dropout,head_hidden_size,head_learning_rate,head_num_layer,iter,total time (s),val_loss
objective_58137_00016,RUNNING,136.156.133.98:734830,False,0.1,128,1e-05,1,3,0.001,0.1,16,0.001,1,,,
objective_58137_00017,RUNNING,136.156.133.98:734836,True,0.1,128,1e-05,1,3,0.001,0.1,16,0.001,1,,,
objective_58137_00018,RUNNING,136.156.133.98:734921,False,0.4,128,1e-05,1,3,0.001,0.1,16,0.001,1,,,
objective_58137_00019,RUNNING,136.156.133.98:735042,True,0.4,128,1e-05,1,3,0.001,0.1,16,0.001,1,,,
objective_58137_00020,RUNNING,136.156.133.98:735191,False,0.1,256,1e-05,1,3,0.001,0.1,16,0.001,1,,,
objective_58137_00021,RUNNING,136.156.133.98:735192,True,0.1,256,1e-05,1,3,0.001,0.1,16,0.001,1,,,
objective_58137_00022,RUNNING,136.156.133.98:735358,False,0.4,256,1e-05,1,3,0.001,0.1,16,0.001,1,,,
objective_58137_00023,RUNNING,136.156.133.98:735450,True,0.4,256,1e-05,1,3,0.001,0.1,16,0.001,1,,,
objective_58137_00024,RUNNING,136.156.133.98:735550,False,0.1,64,0.001,3,3,0.001,0.1,16,0.001,1,,,
objective_58137_00025,RUNNING,136.156.133.98:735669,True,0.1,64,0.001,3,3,0.001,0.1,16,0.001,1,,,




[36m(objective pid=733452)[0m torch.Size([1, 90, 17])
[36m(objective pid=733452)[0m torch.Size([1, 90, 17])
[36m(objective pid=733452)[0m torch.Size([1, 90, 17])
[36m(objective pid=733452)[0m torch.Size([1, 90, 17])
[36m(objective pid=733452)[0m torch.Size([1, 90, 17])
[36m(objective pid=733452)[0m torch.Size([1, 90, 17])[32m [repeated 221x across cluster][0m
[36m(objective pid=733478)[0m torch.Size([1, 90, 17])[32m [repeated 427x across cluster][0m
[36m(objective pid=733450)[0m torch.Size([1, 90, 17])[32m [repeated 416x across cluster][0m
[36m(objective pid=733450)[0m torch.Size([1, 90, 17])[32m [repeated 429x across cluster][0m
[36m(objective pid=733453)[0m torch.Size([1, 90, 17])[32m [repeated 417x across cluster][0m
[36m(objective pid=733452)[0m torch.Size([1, 90, 17])[32m [repeated 418x across cluster][0m
[36m(objective pid=733450)[0m torch.Size([1, 90, 17])[32m [repeated 421x across cluster][0m
[36m(objective pid=733452)[0m torch.Size([1, 9

2024-03-28 12:30:29,182	ERROR tune_controller.py:1374 -- Trial task failed for trial objective_58137_00000
Traceback (most recent call last):
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/gbmc/miniforge3/envs/Hydra_Code/lib/python3.11/site-packages/ray/_private/worker.py", line 2624, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(NameError): [36mray::ImplicitFunc.train()[39m (pid=733450, ip=136.1



[36m(objective pid=733452)[0m torch.Size([1, 90, 17])[32m [repeated 440x across cluster][0m
[36m(objective pid=733453)[0m Epoch 1: Validation Mode
[36m(objective pid=733453)[0m general difference : 0.5563551228387015 
[36m(objective pid=733453)[0m specific difference: 1.5297278925350735
[36m(objective pid=733453)[0m Climatology loss: 20.68513767174312
[36m(objective pid=733452)[0m Epoch 1: Training Mode
[36m(objective pid=733452)[0m general difference : 0.9495864011837312 
[36m(objective pid=733452)[0m specific difference: 5.562232603720746
[36m(objective pid=733452)[0m Climatology loss: 27.39987973998007
[36m(objective pid=733633)[0m torch.Size([1, 90, 17])[32m [repeated 432x across cluster][0m




[36m(objective pid=733627)[0m Epoch 1: Validation Mode
[36m(objective pid=733478)[0m Epoch 1: Training Mode[32m [repeated 2x across cluster][0m
[36m(objective pid=733627)[0m general difference : 3.931296693427222 [32m [repeated 3x across cluster][0m
[36m(objective pid=733627)[0m specific difference: 1.1243873727321625[32m [repeated 3x across cluster][0m
[36m(objective pid=733627)[0m Climatology loss: 16.89325030514172[32m [repeated 3x across cluster][0m
[36m(objective pid=733452)[0m torch.Size([1, 90, 17])[32m [repeated 392x across cluster][0m
[36m(objective pid=733497)[0m Epoch 1: Validation Mode
[36m(objective pid=733497)[0m general difference : 2.2260935153279986 
[36m(objective pid=733497)[0m specific difference: 3.5930123935426983
[36m(objective pid=733497)[0m Climatology loss: 19.525672997065953




[36m(objective pid=733452)[0m torch.Size([1, 90, 17])[32m [repeated 343x across cluster][0m
[36m(objective pid=733452)[0m torch.Size([1, 90, 17])[32m [repeated 315x across cluster][0m




[36m(objective pid=733452)[0m torch.Size([1, 90, 17])[32m [repeated 283x across cluster][0m
[36m(objective pid=733452)[0m Epoch 1: Validation Mode
[36m(objective pid=733452)[0m general difference : 0.3642181307928903 
[36m(objective pid=733452)[0m specific difference: 1.125219568184444
[36m(objective pid=733452)[0m Climatology loss: 21.229541251318796
[36m(objective pid=733568)[0m Epoch 1: Training Mode
[36m(objective pid=733478)[0m torch.Size([1, 90, 17])[32m [repeated 260x across cluster][0m
[36m(objective pid=733519)[0m Epoch 1: Validation Mode
[36m(objective pid=733605)[0m general difference : 2.8810963228765485 [32m [repeated 3x across cluster][0m
[36m(objective pid=733605)[0m specific difference: 7.238150995934661[32m [repeated 3x across cluster][0m
[36m(objective pid=733605)[0m Climatology loss: 28.32948226861922[32m [repeated 3x across cluster][0m
[36m(objective pid=733478)[0m Epoch 1: Validation Mode
[36m(objective pid=733605)[0m Epoch 1: Tr



[36m(objective pid=733598)[0m torch.Size([1, 90, 17])[32m [repeated 227x across cluster][0m
[36m(objective pid=733478)[0m general difference : -0.022922114644731793 
[36m(objective pid=733478)[0m specific difference: 0.8541948359353202
[36m(objective pid=733478)[0m Climatology loss: 20.90152809074947
[36m(objective pid=733502)[0m Epoch 1: Training Mode
[36m(objective pid=733502)[0m general difference : 0.9972079903628158 
[36m(objective pid=733502)[0m specific difference: 5.013185891399478
[36m(objective pid=733502)[0m Climatology loss: 28.018829904793765
[36m(objective pid=733568)[0m torch.Size([1, 90, 17])[32m [repeated 219x across cluster][0m




[36m(objective pid=733568)[0m torch.Size([1, 90, 17])[32m [repeated 217x across cluster][0m




[36m(objective pid=733598)[0m torch.Size([1, 90, 17])[32m [repeated 186x across cluster][0m
[36m(objective pid=733568)[0m Epoch 1: Validation Mode
[36m(objective pid=733568)[0m general difference : -0.1828170713356563 
[36m(objective pid=733568)[0m specific difference: 0.4477879512310028
[36m(objective pid=733568)[0m Climatology loss: 21.244522050959723
[36m(objective pid=733598)[0m torch.Size([1, 90, 17])[32m [repeated 176x across cluster][0m
[36m(objective pid=733605)[0m Epoch 1: Validation Mode
[36m(objective pid=733605)[0m general difference : 2.3960689985752106 
[36m(objective pid=733605)[0m specific difference: 1.574074103321348
[36m(objective pid=733605)[0m Climatology loss: 21.029518709523337
[36m(objective pid=733583)[0m torch.Size([1, 90, 17])[32m [repeated 145x across cluster][0m
[36m(objective pid=733598)[0m torch.Size([1, 90, 17])[32m [repeated 132x across cluster][0m
[36m(objective pid=733502)[0m Epoch 1: Validation Mode
[36m(objective p



[36m(objective pid=733583)[0m torch.Size([1, 90, 17])[32m [repeated 117x across cluster][0m




[36m(objective pid=733598)[0m torch.Size([1, 90, 17])[32m [repeated 103x across cluster][0m
[36m(objective pid=733633)[0m torch.Size([1, 90, 17])[32m [repeated 95x across cluster][0m




[36m(objective pid=733598)[0m Epoch 1: Training Mode
[36m(objective pid=733598)[0m general difference : 1.4783406031642596 
[36m(objective pid=733598)[0m specific difference: 5.416695871662903
[36m(objective pid=733598)[0m Climatology loss: 29.378718711042236
[36m(objective pid=733633)[0m torch.Size([1, 90, 17])[32m [repeated 96x across cluster][0m




[36m(objective pid=733583)[0m torch.Size([1, 90, 17])[32m [repeated 98x across cluster][0m
[36m(objective pid=733583)[0m Epoch 1: Training Mode
[36m(objective pid=733583)[0m general difference : 2.324753636757615 
[36m(objective pid=733583)[0m specific difference: 7.149964876214437
[36m(objective pid=733583)[0m Climatology loss: 28.34958413257184
[36m(objective pid=733583)[0m torch.Size([1, 90, 17])[32m [repeated 98x across cluster][0m
[36m(objective pid=733633)[0m Epoch 1: Training Mode
[36m(objective pid=733633)[0m general difference : 2.003417256650438 
[36m(objective pid=733633)[0m specific difference: 8.579756757258936
[36m(objective pid=733633)[0m Climatology loss: 27.47745031701012
[36m(objective pid=733598)[0m torch.Size([1, 90, 17])[32m [repeated 101x across cluster][0m
[36m(objective pid=733583)[0m torch.Size([1, 90, 17])[32m [repeated 104x across cluster][0m
[36m(objective pid=733583)[0m torch.Size([1, 90, 17])[32m [repeated 102x across clu



[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 25x across cluster][0m




[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 23x across cluster][0m




[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 22x across cluster][0m
[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 21x across cluster][0m
[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 21x across cluster][0m
[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 20x across cluster][0m
[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 20x across cluster][0m
[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 20x across cluster][0m
[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 19x across cluster][0m
[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 20x across cluster][0m
[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 21x across cluster][0m
[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 22x across cluster][0m
[36m(objective pid=733615)[0m torch.Size([1, 90,



[36m(objective pid=734921)[0m Epoch 1: Training Mode
[36m(objective pid=733615)[0m torch.Size([1, 90, 17])[32m [repeated 5x across cluster][0m
[36m(objective pid=734921)[0m general difference : 2.516323894104119 
[36m(objective pid=734921)[0m specific difference: 8.470799910638661
[36m(objective pid=734921)[0m Climatology loss: 28.094146323957972
[36m(objective pid=734830)[0m general difference : 2.7563777983589905 
[36m(objective pid=734830)[0m specific difference: 10.816608653082124
[36m(objective pid=734830)[0m Climatology loss: 31.01154181356568


{'body_hidden_size': 64, 'body_num_layer': 1, 'body_dropout': 0.4, 'bidirectional': True, 'body_output': 3, 'body_learning_rate': 0.001, 'head_hidden_size': 16, 'head_num_layer': 1, 'head_dropout': 0.1, 'head_learning_rate': 0.001, 'general_head_learning_rate': 0.001}
