# Read inputs from dss files and collate into input and output csv files

To prepate ANN data sets for training this notebook reads from the given dss files and builds the input and output csv files

## ANN Inputs
1.	Sac Flow
 * /CALSIM-SMOOTH/C_SAC048/FLOW/01JAN1922/1DAY/L2020A_DCP_EX/
 * /CALSIM/C_CSL004A/CHANNEL/01JAN1920/1MON/L2020A/
 * /CALSIM/C_CLV004/FLOW/01JAN1920/1MON/L2020A_DCP_EX/
 * /CALSIM/C_MOK019/CHANNEL/01JAN1920/1MON/L2020A/

2.	Exports
 * /CALSIM/C_CAA003_TD/FLOW/01JAN1920/1MON/L2020A_DCP_EX/
 * /CALSIM/C_DMC000_TD/FLOW/01JAN1920/1MON/L2020A_DCP_EX/
 * /CALSIM/D408/FLOW/01JAN1920/1MON/L2020A_DCP_EX/
 * /CALSIM/D_SJR028_WTPDWS/FLOW/01JAN1920/1MON/L2020A_DCP_EX/

3.	DCC
 * /CALSIM/DXC/GATE-DAYS-OPEN/01JAN2010/1MON/L2020A/

4.	Net_DCD
 * /CALSIM/NET_DICU/DICU_FLOW/01JAN1920/1MON/L2020A/

5.	SJR flow
 * /CALSIM-SMOOTH/C_SJR070/FLOW/01JAN1935/1DAY/L2020A_DCP_EX/

6.	Tide
 * /DWR/SAN_FRANCISCO/STAGE-MAX-MIN/01JAN1919/1DAY/ASTRO_NAVD_20170607/

7.	SMSCG
 * /MONTEZUMA/SMSCG/GATE-OPERATE/01JAN1922/1DAY/DCP_EX/


## ANN Outputs

Take all the EC and output to csv file
```
CVP_INTAKE :: //CVP_INTAKE/EC////
MIDR_INTAKE :: //MIDR_INTAKE/EC////
OLDR_CCF :: //OLDR_CCF/EC////
 * ROLD014 :: //ROLD014/EC////
ROLD024 :: //ROLD024/EC////
RSAC081 :: //ROLD081/EC////
RSAC092 :: //RSAC092/EC////
RSAN007 :: //RSAN007/EC////
RSAN018 :: //RSAN018/EC////
SLMZU003 :: //SLMZU003/EC////
SLMZU011 :: //SLMZU011/EC////
VICT_INTAKE :: //VICT_INTAKE/EC////
```

* means missing

In [1]:
%load_ext autoreload.
%autoreload 2


^C


In [None]:
# The following code can be used for installing the pyhecdss
#!pip install git+https://github.com/CADWRDeltaModeling/pyhecdss.git

In [1]:
import pyhecdss
import annutils
import dssioutils
import dssioutils_new
import dssioutils_cs3_base
import dssioutils_cs3_base_ns
import dssioutils_cs3_6k_tunnel
import pandas as pd
import numpy as np

In [2]:
import hvplot.pandas
import holoviews as hv
import panel as pn

# Display sample contents of a DSS File

In [3]:
import os
path_data = '.'

In [None]:
pyhecdss.DSSFile(os.path.join(path_data, 'base.dss')).read_catalog().hvplot.table()
pyhecdss.DSSFile(os.path.join(path_data, 'base_ns.dss')).read_catalog().hvplot.table()
pyhecdss.DSSFile(os.path.join(path_data, '6k_tunnel.dss')).read_catalog().hvplot.table()

In [4]:
#pyhecdss.DSSFile(os.path.join(path_data, 'CS3v27_ex_combined.dss')).read_catalog().hvplot.table()
#pyhecdss.DSSFile(os.path.join(path_data, 'SMSCG_operation_OFF.dss')).read_catalog().hvplot.table()

# Read inputs and outputs

In [None]:
df_base = dssioutils_cs3_base.collate_calsim_inputs_for_ann(os.path.join(path_data, 'base.dss'))
df_base.to_csv('in_base.csv')
df_base_ns = dssioutils_cs3_base_ns.collate_calsim_inputs_for_ann(os.path.join(path_data, 'base_ns.dss'))
df_base_ns.to_csv('in_base_ns.csv')
df_6k_tunnel = dssioutils_cs3_6k_tunnel.collate_calsim_inputs_for_ann(os.path.join(path_data, '6k_tunnel.dss'))
df_6k_tunnel.to_csv('in_6k_tunnel.csv')

In [5]:
#dfon_new = dssioutils_new.collate_calsim_inputs_for_ann(os.path.join(path_data, 'CS3v27_ex_combined.dss'))
#dfon_new.to_csv('CS3v27_ex_combined.csv')

#dfon_new = dssioutils_new.collate_calsim_inputs_for_ann(os.path.join(path_data, 'SMSCG_operation_OFF.dss'))
#dfon_new.to_csv('SMSCG_operation_OFF.csv')

#dfoff = dssioutils.collate_calsim_inputs_for_ann(os.path.join(path_data, 'SMSCG_operation_OFF.dss'))
#dfoff.to_csv('SMSCG_operation_OFF.csv')


In [None]:
#HZS; for any new data sets if DCC values are greater than 1, then for setting it to zero and 1 values, we have to run the following code.
import pandas as pd

def optimize_dcc_values(df):
    df['Unnamed: 0'] = pd.to_datetime(df['Unnamed: 0'])
    df['Year'] = df['Unnamed: 0'].dt.year
    df['Month'] = df['Unnamed: 0'].dt.month
    grouped = df.groupby(['Year', 'Month'])

    for name, group in grouped:
        dcc_value = group['dcc'].iloc[0]
        days_in_month = len(group)
        df.loc[group.index, 'dcc'] = [1 if i <= dcc_value else 0 for i in range(1, days_in_month + 1)]

    df.drop(['Year', 'Month'], axis=1, inplace=True)
    return df

file_path = 'inputs_CS3v27_ex_combined.csv'
df = pd.read_csv(file_path)

updated_df = optimize_dcc_values(df)
updated_df.to_csv('updated_inputs_CS3v27_ex_combined.csv', index=False)

In [8]:
#import dssioutils_original

In [10]:
#dfon_new = dssioutils.collate_calsim_inputs_for_ann(os.path.join(path_data, 'CS3v27_ex_combined.dss'))
#dfon = dssioutils_original.collate_calsim_inputs_for_ann(os.path.join(path_data, 'SMSCG_operation_ON.dss'))

#dfon = dssioutils_original.collate_calsim_inputs_for_ann(os.path.join(path_data, 'SMSCG_operation_ON.dss'))

#dfoff = dssioutils_original.collate_calsim_inputs_for_ann(os.path.join(path_data, 'SMSCG_operation_OFF.dss'))

#dfon_new.to_csv('CS3v27_ex_combined.csv')
#dfon_new.to_csv('smscg_input_on.csv')
#dfoff.to_csv('smscg_input_off.csv')

In [None]:
bparts = ['CVP_INTAKE', 'MIDR_INTAKE', 'OLDR_CCF', 'ROLD024', 'RSAC054','RSAC081', 'RSAC092', 'RSAN007', 'RSAN018', 'SLMZU003', 'SLMZU011', 'VICT_INTAKE']


def read_ec(dssfile, bpart):
    df = list(pyhecdss.get_ts(dssfile, '//%s/EC////' % bpart))[0][0]
    df.index = df.index.to_timestamp()
    df.columns = ['%s_EC' % bpart]
    return df


#dfon = pd.concat([read_ec(os.path.join(path_data, 'SMSCG_operation_ON.dss'), bpart) for bpart in bparts], axis=1)
#dfon.to_csv('smscg_output_on.csv')
#dfoff = pd.concat([read_ec(os.path.join(path_data, 'SMSCG_operation_OFF.dss'), bpart) for bpart in bparts], axis=1)
#dfoff.to_csv('smscg_output_off.csv')

df_ex = pd.concat([read_ec(os.path.join(path_data, 'base.dss'), bpart) for bpart in bparts], axis=1)
df_ex.to_csv('out_base.csv')

df_ex_ns = pd.concat([read_ec(os.path.join(path_data, 'base_ns.dss'), bpart) for bpart in bparts], axis=1)
df_ex_ns.to_csv('out_base_ns.csv')

df_pa6k = pd.concat([read_ec(os.path.join(path_data, '6k_tunnel.dss'), bpart) for bpart in bparts], axis=1)
df_pa6k.to_csv('out_6k_tunnel.csv')