# Notebook to explore inherent model variance 

# Step 0 - Prepare Notebook

In [1]:
import os
import time 
import json
import pandas as pd
import warnings
import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import display
from snowML.datapipe.utils import data_utils as du
from snowML.LSTM import set_hyperparams as sh
from snowML.LSTM import LSTM_pre_process as pp 
from snowML.LSTM import LSTM_train as LSTM_tr
from snowML.LSTM import LSTM_metrics as met
from snowML.LSTM import LSTM_plot3 as plot3
from snowML.Scripts import local_training_mixed_loss as ml


# Step 1 - Define HyperParams and Test Huc

In [2]:
huc = '170200090101' ## Chelan, maritime

In [3]:
# set hyperparams
# load base line params 
params = sh.create_hyper_dict()
# reset the ones we care about
params["learning_rate"] = .001
params["n_epochs"] = 10    
params["batch_size"] = 32
params["var_list"] = ['mean_pr', 'mean_tair']
params["expirement_name"] = "MultipleRunsSameHuc"
params["loss_type"] = "mse"
params["train_size_dimension"] = "time"
params["train_size_fraction"] = .67
params["recursive_predict"] = False 
params["UCLA"] = False # start w/ UA data then update 
params

{'hidden_size': 64,
 'num_class': 1,
 'num_layers': 1,
 'dropout': 0.5,
 'learning_rate': 0.001,
 'n_epochs': 10,
 'lookback': 180,
 'batch_size': 32,
 'n_steps': 1,
 'num_workers': 8,
 'var_list': ['mean_pr', 'mean_tair'],
 'expirement_name': 'MultipleRunsSameHuc',
 'loss_type': 'mse',
 'mse_lambda_start': 1,
 'mse_lambda_end': 0.5,
 'train_size_dimension': 'time',
 'train_size_fraction': 0.67,
 'mlflow_tracking_uri': 'arn:aws:sagemaker:us-west-2:677276086662:mlflow-tracking-server/dawgsML',
 'recursive_predict': False,
 'lag_days': 30,
 'lag_swe_var_idx': 3,
 'filter_dates': ['1984-10-01', '2021-09-30'],
 'custom delta': 0.04,
 'UCLA': False,
 'Stop_Loss': False,
 'KGE_target': 0.9,
 'MLFLOW_ON': True}

# Step 2 - Define model functions 

In [18]:
def pre_process(huc, params): 
    # normalize the data and create train/test split 
    df_dict = pp.pre_process_separate([huc], params["var_list"], UCLA = params["UCLA"], filter_dates=params["filter_dates"])
    train_size_frac = params["train_size_fraction"]
    df = df_dict[huc]
    df_train, _, _, _ = pp.train_test_split_time(df, train_size_frac)
    return df_dict, df_train

def train_model (df_train, params): 
    model_dawgs, optimizer_dawgs, loss_fn_dawgs = ml.initialize_model(params)
   
    for epoch in range(params["n_epochs"]):
        # for local training, call fine_tune instead of pre_train
        print(f"Training in epoch {epoch}")
        LSTM_tr.fine_tune(
            model_dawgs,
            optimizer_dawgs,
            loss_fn_dawgs,
            df_train,
            params,
            epoch
            )
    return model_dawgs 

def evaluate(model_dawgs_trained, df_dict, huc, params):
    if params["UCLA"]:
        suffix = "UCLA"
    else: 
        suffix = "UA"
    data, y_tr_pred, y_te_pred, y_tr_true, y_te_true,  y_te_pred_recur, train_size, = LSTM_tr.predict_prep (model_dawgs_trained,
                df_dict, huc, params)
    metric_dict_test = met.calc_metrics(y_te_true, y_te_pred, metric_type = f"test_{suffix}")
    if y_te_pred_recur is not None:
        metric_dict_test_recur = met.calc_metrics(y_te_true, y_te_pred_recur, metric_type = f"test_recur_{suffix}")
        combined_dict = {**metric_dict_test, **metric_dict_test_recur}
    else:
        combined_dict = metric_dict_test
        
    return combined_dict, data, y_tr_pred, y_te_pred, y_tr_true, y_te_true, y_te_pred_recur, train_size


def combine_results(data, y_tr_pred, y_te_pred, y_tr_true, y_te_true, y_te_pred_recur, train_size, params):
    results_df = data[["mean_swe"]].copy()
    results_df["y_tr_pred"] = list(y_tr_pred) + [float('nan')] * (len(results_df) - len(y_tr_pred))
    results_df["y_te_pred"] = [float('nan')] * (train_size+params["lookback"]) + list(y_te_pred)
    results_df["y_tr_true"] = list(y_tr_true) + [float('nan')] * (len(results_df) - len(y_tr_true))
    results_df["y_te_true"] = [float('nan')] *(train_size+params["lookback"]) + list(y_te_true)
    return results_df

def label_results(df, suffix):
    df = df.add_suffix(f"_{suffix}")
    return df

def run_one(huc, params, data_type = "UA"): 
    if data_type == "UCLA": 
        params["UCLA"] = True 
        suffix = "UCLA" 
    else: 
        params["UCLA"] = False
        suffix = "UA"
    df_dict, df_train = pre_process(huc, params)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", UserWarning) # ignore warning about drop param being irrelevant with single deep layer
        model_dawgs_trained = train_model(df_train, params)
    combined_dict, data, y_tr_pred, y_te_pred, y_tr_true, y_te_true, y_te_pred_recur, tr_size = evaluate(model_dawgs_trained, df_dict, huc, params)
    #df_results = combine_results(data, y_tr_pred, y_te_pred, y_tr_true, y_te_true, y_te_pred_recur, tr_size, params)
    #df_results = label_results(df_results, suffix)
    #return combined_dict, df_results, tr_size
    return combined_dict

def dict_to_single_row_df(data_dict):
    """
    Transforms a dictionary into a pandas DataFrame with one row.
    
    Parameters:
        data_dict (dict): The dictionary to transform.

    Returns:
        pd.DataFrame: A DataFrame with one row and keys as column names.
    """
    return pd.DataFrame([data_dict])






# Step 3 - Get Results Sample Huc 

In [5]:
huc = '170200090101'

In [19]:
results = run_one(huc, params)

Training in epoch 0
Training in epoch 1
Training in epoch 2
Training in epoch 3
Training in epoch 4
Training in epoch 5
Training in epoch 6
Training in epoch 7
Training in epoch 8
Training in epoch 9


In [20]:
results

{'test_UA_mse': 0.009720983,
 'test_UA_kge': 0.9385536544028658,
 'test_UA_r2': 0.9444973777293773,
 'test_UA_mae': 0.06340153}

# Now Try The Module 

In [24]:
from snowML.Scripts import multi_run_single_huc as mrsh
import importlib

In [29]:
importlib.reload(mrsh)

<module 'snowML.Scripts.multi_run_single_huc' from '/home/suetboyd/Capstone/SnowML/src/snowML/Scripts/multi_run_single_huc.py'>

In [30]:
mrsh.run_multi_exp(huc, params)



Epoch 0
evaluating on huc 170200090101
test_mse: 0.04752444848418236
test_kge: 0.846424696822091
test_r2: 0.728655864644969
test_mae: 0.15049424767494202
train_mse: 0.057158514857292175
train_kge: 0.8164002698266688
train_r2: 0.6847067845297408
train_mae: 0.1590631902217865
Epoch 1
evaluating on huc 170200090101
test_mse: 0.026343023404479027
test_kge: 0.9238446355859425
test_r2: 0.8495926910831191
test_mae: 0.10974498838186264
train_mse: 0.03684147819876671
train_kge: 0.895491638441111
train_r2: 0.7967779667238732
train_mae: 0.12370743602514267
Epoch 2
evaluating on huc 170200090101
test_mse: 0.020172692835330963
test_kge: 0.9077584150721032
test_r2: 0.884822614828817
test_mae: 0.09553276747465134
train_mse: 0.023958703503012657
train_kge: 0.8836893011589392
train_r2: 0.8678409154877746
train_mae: 0.10758419334888458
Epoch 3
evaluating on huc 170200090101
test_mse: 0.015524337999522686
test_kge: 0.9412891702763703
test_r2: 0.9113627164450626
test_mae: 0.0756797045469284
train_mse: 0.0

(0.8551491495769864,
 {'test_mse': 0.017223932,
  'test_kge': 0.9092803592121643,
  'test_r2': 0.901658775889191,
  'test_mae': 0.08032221})