# Setup


In [18]:
import os
import pickle
import sys
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import r2_score
from tensorflow import keras
from tensorflow.keras import layers
from tqdm.auto import tqdm, trange

tqdm.pandas()

from collections import defaultdict

local_root_path = "."

sys.path.append(local_root_path)
import annutils

import joblib

# excel_files

In [26]:

dsp_home = r"F:\projects\ann_dsp"
# find all the .xlsx files in the local_root_path dir
# excel_files = [f for f in os.listdir(local_root_path) if f.endswith(".xlsx") and not f.startswith("~$")]
# excel_files = [r"D:\projects\delta_salinity\model\dsm2\DSP_DSM2_202307\modified_bc\anninputs\perturbhist\dsm2_ann_inputs_perturbhist.xlsx"]
excel_files = [rf"{dsp_home}\model\dsm2\DSP_DSM2_202307\historical\anninputs\dsm2_ann_inputs_historical.xlsx"]
excel_files = [rf"{dsp_home}\model\dsm2\DSP_DSM2_202307\latinhypercube_7\anninputs\lathypcub_0\dsm2_ann_inputs_lathypcub_0.xlsx"]
excel_files = [rf"{dsp_home}\model\dsm2\DSP_DSM2_202307\historical\anninputs\dsm2_ann_inputs_historical.xlsx"]

#excel_files = [rf"{dsp_home}\model\dsm2\DSP_DSM2_202307\latinhypercube_7\anninputs\lathypcub_{exp}\dsm2_ann_inputs_lathypcub_{exp}.xlsx" for exp in [2]]
#2 3 5 1



# Setup stations
In order to turn the excel sheets into inputs we can predict on we need to setup

In [27]:
num_sheets = 9

observed_stations_ordered_by_median = ['RSMKL008', 'RSAN032', 'RSAN037', 'RSAC092', 'SLTRM004', 'ROLD024',
                                       'CHVCT000', 'RSAN018', 'CHSWP003', 'CHDMC006', 'SLDUT007', 'RSAN072',
                                       'OLD_MID', 'RSAN058', 'ROLD059', 'RSAN007', 'RSAC081', 'SLMZU025',
                                       'RSAC075', 'SLMZU011', 'SLSUS012', 'SLCBN002', 'RSAC064']

output_stations = ['CHDMC006-CVP INTAKE', 'CHSWP003-CCFB_INTAKE', 'CHVCT000-VICTORIA INTAKE',
                   'OLD_MID-OLD RIVER NEAR MIDDLE RIVER', 'ROLD024-OLD RIVER AT BACON ISLAND',
                   'ROLD059-OLD RIVER AT TRACY BLVD', 'RSAC064-SACRAMENTO R AT PORT CHICAGO',
                   'RSAC075-MALLARDISLAND', 'RSAC081-COLLINSVILLE', 'RSAC092-EMMATON',
                   'RSAC101-SACRAMENTO R AT RIO VISTA', 'RSAN007-ANTIOCH', 'RSAN018-JERSEYPOINT',
                   'RSAN032-SACRAMENTO R AT SAN ANDREAS LANDING', 'RSAN037-SAN JOAQUIN R AT PRISONERS POINT',
                   'RSAN058-ROUGH AND READY ISLAND', 'RSAN072-SAN JOAQUIN R AT BRANDT BRIDGE',
                   'RSMKL008-S FORK MOKELUMNE AT TERMINOUS', 'SLCBN002-CHADBOURNE SLOUGH NR SUNRISE DUCK CLUB',
                   'SLDUT007-DUTCH SLOUGH', 'SLMZU011-MONTEZUMA SL AT BELDONS LANDING',
                   'SLMZU025-MONTEZUMA SL AT NATIONAL STEEL', 'SLSUS012-SUISUN SL NEAR VOLANTI SL',
                   'SLTRM004-THREE MILE SLOUGH NR SAN JOAQUIN R', 'SSS-STEAMBOAT SL', 'CCW-MIDDLE RIVER INTAKE',
                   'OH4-OLD R @ HWY 4', 'SLRCK005-CCWD_Rock', 'MRU-MIDDLE RIVER AT UNDINE ROAD', 'HLL-HOLLAND TRACT',
                   'BET-PIPER SLOUGH @ BETHEL TRACT', 'GES-SACRAMENTO R BELOW GEORGIANA SLOUGH',
                   'NMR: N FORK MOKELUMNE R NEAR WALNUT GROVE', 'IBS-CORDELIA SLOUGH @ IBIS CLUB',
                   'GYS-GOODYEAR SLOUGH AT MORROW ISLAND CLUB', 'BKS-SLBAR002-North Bay Aqueduct/Barker Sl']

output_stations, name_mapping = annutils.read_output_stations(output_stations, observed_stations_ordered_by_median)

# mse_loss_masked def

In [28]:
def mse_loss_masked(y_true, y_pred):
    squared_diff = tf.reduce_sum(tf.math.squared_difference(y_pred[y_true > 0], y_true[y_true > 0]))
    return squared_diff / (tf.reduce_sum(tf.cast(y_true > 0, tf.float32)) + 0.01)

# predict definitions

In [29]:
def predict(location, df_input, output_columns):
    model=keras.models.load_model('%s.h5'%location,custom_objects={"mse_loss_masked": mse_loss_masked})
    xscaler,yscaler=joblib.load('%s_xyscaler.dump'%location)
    return predict_with_model(model, xscaler, yscaler, df_input, output_columns)

def predict_with_model(model, xscaler, yscaler, df_input, output_columns):
    dfx = pd.DataFrame(xscaler.transform(df_input), df_input.index, columns=df_input.columns)

    yyp=model.predict(dfx, verbose=True)
    predicted_y = yscaler.inverse_transform(yyp)
    return pd.DataFrame(predicted_y, index=df_input.index, columns=output_columns)

# Run predictions

In [36]:
experiments = [ "6yearsaugmented"] #["latinhypercube_7"]  #["lathypcub_regtide"] #"colab","6years", "4years","4years_DCC","4years_SacLag","4years_SacMag","colab_wo2015"

experiments = ["4years_cal"]
experiments = ["latinhypercube_7","colab_reduced","4years_cal","6yearsaugmented"]
#experiments = ["colab_reduced","latinhypercube_7"]

for experiment in tqdm(experiments):
    print("Experiment: %s" % experiment)
    experiment_dir = os.path.join(local_root_path, "Experiments", experiment)

    ndays = 118
    window_size = 0
    nwindows = 0

    compression_opts = dict(method='zip', archive_name='out.csv')

    model_dir = os.path.join(experiment_dir, "models")
    model_files = [f for f in os.listdir(model_dir) if f.endswith(".h5")]

    print("Local root: ",local_root_path)
    for data_file in tqdm(excel_files):
        print("Data file: %s" % data_file)
        data_path = os.path.join(local_root_path,data_file)
        dfinps, dfouts = annutils.read_and_split(data_path, num_sheets, observed_stations_ordered_by_median)
        for cn in dfinps.columns:
            print("Col "+cn)

        dfinps = annutils.create_antecedent_inputs(dfinps,ndays=ndays,window_size=window_size,nwindows=nwindows)
        dfinps, dfouts = annutils.synchronize(dfinps, dfouts)


        #get the name of the file without the extension
        # file_name = os.path.splitext(data_file)[0]
        file_name = os.path.splitext(os.path.basename(data_file))[0]

        dirs = ["input", "target", "prediction"]
        for dir in dirs:
            os.makedirs(os.path.join("Experiments", experiment, "results", dir), exist_ok=True)

        input_file = os.path.join("Experiments", experiment, "results", "input", file_name + ".csv")
        dfinps.to_csv(input_file, compression=compression_opts)

        # read_in = pd.read_csv(input_file, compression=compression_opts, index_col=0)

        target_file = os.path.join("Experiments", experiment, "results", "target", file_name + "_target.csv")
        dfouts.to_csv(target_file, compression=compression_opts)

        for model_file in tqdm(model_files):
            print("Model file: %s" % model_file)
            model_name = os.path.splitext(model_file)[0]

            model_prediction_dir = os.path.join(experiment_dir, "results", "prediction", model_name)
            os.makedirs(model_prediction_dir, exist_ok=True)

            location = os.path.join(model_dir, model_name)
            print("Location: %s" % location)
            #prediction = predict(location, dfinps, dfouts.columns)
            model=keras.models.load_model('%s.h5'%location,custom_objects={"mse_loss_masked": mse_loss_masked})
            xscaler,yscaler=joblib.load('%s_xyscaler.dump'%location)
            print("Xscaler Min[0]: %s" % xscaler.min_val[0])
            print("Xscaler Max[0]: %s" % xscaler.max_val[0])
            scaled_input = xscaler.transform(dfinps)
            print("scaled input")
            print(scaled_input.iloc[0])
            #print(scaled_input.iloc[1])
            #print(scaled_input)
            dfx = pd.DataFrame(scaled_input, dfinps.index, columns=dfinps.columns)

            yyp=model.predict(dfx, verbose=True)
            predicted_y = yscaler.inverse_transform(yyp)
            prediction = pd.DataFrame(predicted_y, index=dfinps.index, columns=dfouts.columns)
            print("prediction")
            print(prediction)
            prediction_file = os.path.join(model_prediction_dir, file_name + ".csv")
            print(f"Writing to {prediction_file}")
            prediction.to_csv(prediction_file, compression=compression_opts)

print("Done!")

  0%|          | 0/2 [00:00<?, ?it/s]

Experiment: colab_reduced
Local root:  .




Data file: F:\projects\ann_dsp\model\dsm2\DSP_DSM2_202307\historical\anninputs\dsm2_ann_inputs_historical.xlsx
Col northern_flow
Col sjr_flow
Col exports
Col dcc_op
Col div+seep-drain_dcd+smcd
Col daily_max-min
Col sjr_vernalis_ec
Col sac_greens_ec



[A

Model file: mtl_i118_lstm14_lstm14_f_o1.h5
Location: .\Experiments\colab_reduced\models\mtl_i118_lstm14_lstm14_f_o1
Xscaler Min[0]: 3519.98323631286
Xscaler Max[0]: 100000.0
scaled input
/HIST+GATE/RSAC128/POS/01JAN1953 - 01JAN2020/IR-YEAR/DWR-DMS-DSM2/_lag1           NaN
/HIST+GATE/RSAC128/POS/01JAN1953 - 01JAN2020/IR-YEAR/DWR-DMS-DSM2/_lag10          NaN
/HIST+GATE/RSAC128/POS/01JAN1953 - 01JAN2020/IR-YEAR/DWR-DMS-DSM2/_lag100         NaN
/HIST+GATE/RSAC128/POS/01JAN1953 - 01JAN2020/IR-YEAR/DWR-DMS-DSM2/_lag101         NaN
/HIST+GATE/RSAC128/POS/01JAN1953 - 01JAN2020/IR-YEAR/DWR-DMS-DSM2/_lag102         NaN
                                                                               ...   
sjr_vernalis_ec_lag95                                                        0.681067
sjr_vernalis_ec_lag96                                                        0.644854
sjr_vernalis_ec_lag97                                                        0.672808
sjr_vernalis_ec_lag98                  


100%|██████████| 1/1 [00:11<00:00, 11.06s/it]
100%|██████████| 1/1 [00:30<00:00, 30.31s/it]
 50%|█████     | 1/2 [00:30<00:30, 30.31s/it]

prediction
            CHDMC006  CHSWP003  CHVCT000  OLD_MID  ROLD024  ROLD059  RSAC064  \
1990-04-28       NaN       NaN       NaN      NaN      NaN      NaN      NaN   
1990-04-29       NaN       NaN       NaN      NaN      NaN      NaN      NaN   
1990-04-30       NaN       NaN       NaN      NaN      NaN      NaN      NaN   
1990-05-01       NaN       NaN       NaN      NaN      NaN      NaN      NaN   
1990-05-02       NaN       NaN       NaN      NaN      NaN      NaN      NaN   
...              ...       ...       ...      ...      ...      ...      ...   
2021-12-26       NaN       NaN       NaN      NaN      NaN      NaN      NaN   
2021-12-27       NaN       NaN       NaN      NaN      NaN      NaN      NaN   
2021-12-28       NaN       NaN       NaN      NaN      NaN      NaN      NaN   
2021-12-29       NaN       NaN       NaN      NaN      NaN      NaN      NaN   
2021-12-30       NaN       NaN       NaN      NaN      NaN      NaN      NaN   

            RSAC075  RSAC081



Data file: F:\projects\ann_dsp\model\dsm2\DSP_DSM2_202307\historical\anninputs\dsm2_ann_inputs_historical.xlsx
Col northern_flow
Col sjr_flow
Col exports
Col dcc_op
Col div+seep-drain_dcd+smcd
Col daily_max-min
Col sjr_vernalis_ec
Col sac_greens_ec



[A

Model file: mtl_i118_lstm14_lstm14_f_o1.h5
Location: .\Experiments\latinhypercube_7\models\mtl_i118_lstm14_lstm14_f_o1
Xscaler Min[0]: 4594.38
Xscaler Max[0]: 100000.0
scaled input
northern_flow_lag1                0.073975
sjr_flow_lag1                     0.040157
exports_lag1                      0.584227
dcc_op_lag1                       1.000000
div+seep-drain_dcd+smcd_lag1      0.893104
                                    ...   
dcc_op_lag118                     1.000000
div+seep-drain_dcd+smcd_lag118    0.849951
daily_max-min_lag118              0.450271
sjr_vernalis_ec_lag118            0.878216
sac_greens_ec_lag118              0.301835
Name: 1990-04-28 00:00:00, Length: 944, dtype: float64
prediction
              CHDMC006    CHSWP003    CHVCT000     OLD_MID     ROLD024  \
1990-04-28  739.006951  712.963004  506.881806  855.593815  875.096227   
1990-04-29  735.341551  710.293504  505.743326  847.540738  873.177414   
1990-04-30  738.916828  713.147235  508.027092  854.631203


100%|██████████| 1/1 [00:11<00:00, 11.20s/it]
100%|██████████| 1/1 [00:29<00:00, 29.48s/it]
100%|██████████| 2/2 [00:59<00:00, 29.90s/it]

Done!



