In [4]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pyhecdss
from vtools.data.vtime import hours
from vtools.functions.filter import cosine_lanczos
import altair as alt
from tabulate import tabulate

pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', None)     # Show all rows
pd.set_option('display.width', 1000)        # Adjust the width to fit the DataFrame
pd.set_option('display.colheader_justify', 'left')  # Left-align column headers
pd.set_option('display.float_format', '{:.2f}'.format)

In [5]:
def load_dss(dss_file,run_list, C_part):
    '''
    Load dss files and output dataframe
    Parameters
    ----------
    dss_file : str
        filename and path of the dss file to load.
    run_list : LIST
        List of the station names wanted in the dataframe.
    C_part : str
        Name of the part C wanted, i.e. 'STAGE' or 'FLOW'.

    Returns
    -------
    temp : dataframe
        dataframe containing the time serie for the stations specified..

    '''
    
    temp = pd.DataFrame()
    # read dss catalog
    with pyhecdss.DSSFile(dss_file) as d:
         df=d.read_catalog()
    sites = df['B'].where(df['C']==C_part).dropna()
    common_sites = list(set(run_list).intersection(set(sites)))
    catdf = df[df['B'].isin(common_sites)] # filter by needed observation stations
    catdf.reset_index(drop=True, inplace=True)

    if C_part == 'STAGE':
        cat = catdf[catdf['C']=='STAGE']
        plists=d.get_pathnames(cat) # that's the list of ts paths
        for i, plist in enumerate(plists):  
            temp[i],units1,ptype1=d.read_rts(plist) # couldn't figure out how to do it without iterating 
            #print(i, plist)

    if C_part == 'FLOW':
        cat = catdf[catdf['C']=='FLOW']
        plists=d.get_pathnames(cat) # that's the list of ts paths
        for i, plist in enumerate(plists):  
            temp[i],units1,ptype1=d.read_rts(plist)
            
    if C_part == 'EC':
        cat = catdf[catdf['C']=='EC']
        plists=d.get_pathnames(cat) # that's the list of ts paths
        for i, plist in enumerate(plists):  
            temp[i],units1,ptype1=d.read_rts(plist) 
        #print(i, plist)
        
    if C_part == 'DEVICE-FLOW':
        cat = catdf[catdf['C']=='DEVICE-FLOW']
        plists=d.get_pathnames(cat) # that's the list of ts paths
        for i, plist in enumerate(plists):  
            temp[i],units1,ptype1=d.read_rts(plist) 
            #print(i, plist)
            
    if C_part == 'ELEV':
        cat = catdf[catdf['C']=='ELEV']
        plists=d.get_pathnames(cat) # that's the list of ts paths
        for i, plist in enumerate(plists):  
            temp[i],units1,ptype1=d.read_rts(plist) 
            #print(i, plist)    
    #A = cat.B.str.replace('GLC2','GLC',regex=True)
    #B = A.str.replace('VCU2','VCU',regex=True)
    temp.columns = cat.B
    
    #temp = temp[common_sites]

    print(dss_file, C_part)
    temp.info()
    return temp


In [6]:
#%% Load data
# leg1 = 'Modeled Historical (H2M)'
leg1 = 'Modeled Historical (H1Z)'
scenario = 'FPV2Ma'

# For now I'll change it by hand but later will read it from echo file
gatef = {'name' : ['GrantLine','MiddleRiver','OldRiver'],
         'width' : [5,5,5],
         'C' : [0.8,0.8,0.8],
         'bottom_elev' :[-6,-5,-7],
         'ID': ['GLC','MID','OLD'],
         'station' : ['DGL','MHO','OLD']}

fig_folder = r'postpro\fig'
fig_name = scenario+'_compv1_%s.png'
save_fig = True
year = '2021'
leg2 = scenario

stime = '%s-01-01'%year
etime = '%s-12-31'%year

# Scenario Developped
hydro2 = r'.\studies\{}\output\{}_hydro.dss'.format(scenario, scenario)
SDG2 = r'.\studies\{}\output\{}_SDG.dss'.format(scenario, scenario)


# Baseline run for hydro comparison
# hydro1 = r'.\studies\H2M\output\H2M_hydro.dss'
#gtm1 = r'../plan_studies/PRIO_og/output/PRIO_og_gtm.dss'
# SDG1 = r'..\studies\H2M\output\H2M_SDG.dss'

hydro1 = r'.\studies\H1Z\output\H1Z_0_hydro.dss'
SDG1 = r'..\studies\H1Z\output\H1Z_0_CCF.dss'



In [7]:
# load data for Velocity analysis
C_part = 'STAGE' # 
elev_list =['MID_GATE_UP','MID_GATE_DOWN','GLC_GATE_UP','GLC_GATE_DOWN','OLD_GATE_UP','OLD_GATE_DOWN']
flow_list = ['GLC_FLOW_FISH','MID_FLOW_FISH','MID_FLOW_GATE','OLD_FLOW_FISH','OLD_FLOW_GATE']

#sc1_stage = load_dss(SDG1,elev_list, C_part)
sc2_stage = load_dss(SDG2,elev_list, C_part)
sc2_stage = sc2_stage[stime:etime]

C_part = 'DEVICE-FLOW'
#sc1_flow= load_dss(SDG1,flow_list, C_part)
sc2_flow = load_dss(SDG2,flow_list, C_part)
sc2_flow = sc2_flow[stime:etime]

# Load data for WL compliance
C_part = 'STAGE' # 
stn_name = ['MHO','DGL','OLD']
stn_thr = [2.5,2.3,2.3]

sc1_wl = load_dss(hydro1,stn_name, C_part)
sc2_wl  = load_dss(hydro2,stn_name, C_part)
sc1_wl = sc1_wl[stime:etime]
sc2_wl = sc2_wl[stime:etime]

# Load Gate OP
C_part = 'ELEV' # 
stn_list =['MID_GATEOP','GLC_GATEOP','OLD_GATEOP']
#sc1_flow= load_dss(SDG1,flow_list, C_part)
sc2_gateop = load_dss(SDG2,stn_list, C_part)
sc2_gateop = sc2_gateop[stime:etime]
sc2_gateop.rename(columns={'MID_GATEOP':'MHO','GLC_GATEOP':'DGL','OLD_GATEOP':'OLD'}, inplace=True) # renaming to make it easier later.



.\studies\FPV2Ma\output\FPV2Ma_SDG.dss STAGE
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 280512 entries, 2016-01-01 00:00:00 to 2023-12-31 23:45:00
Freq: 15min
Data columns (total 6 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   GLC_GATE_DOWN  280512 non-null  float64
 1   GLC_GATE_UP    280512 non-null  float64
 2   MID_GATE_DOWN  280512 non-null  float64
 3   MID_GATE_UP    280512 non-null  float64
 4   OLD_GATE_DOWN  280512 non-null  float64
 5   OLD_GATE_UP    280512 non-null  float64
dtypes: float64(6)
memory usage: 15.0 MB
.\studies\FPV2Ma\output\FPV2Ma_SDG.dss DEVICE-FLOW
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 280512 entries, 2016-01-01 00:00:00 to 2023-12-31 23:45:00
Freq: 15min
Data columns (total 5 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   GLC_FLOW_FISH  280512 non-null  float64
 1   MID_FLOW_FISH  280512 non-null  float64
 2   MID_FL

In [9]:
thr_vel = 8
mask = (sc2_flow.index.month>=5) & (sc2_flow.index.month<12) # time when gate are in operation

# Make velocity df

def calc_vel(flow,stage_up, bottom_elev, width):
    #velocity is flow/cross-section
    xs = (stage_up-bottom_elev)*width
    vel = flow/xs
    return vel

vel=pd.DataFrame()
vel['GLC'] = calc_vel(sc2_flow.GLC_FLOW_FISH,sc2_stage.GLC_GATE_UP,gatef['bottom_elev'][0], gatef['width'][0] )
vel['MID'] = calc_vel(sc2_flow.MID_FLOW_FISH,sc2_stage.MID_GATE_UP,gatef['bottom_elev'][1], gatef['width'][1] )
vel['OLD'] = calc_vel(sc2_flow.OLD_FLOW_FISH,sc2_stage.OLD_GATE_UP,gatef['bottom_elev'][2], gatef['width'][2] )


In [15]:
vel

Unnamed: 0,GLC,MID,OLD
2021-01-01 00:00:00,0.87,0.33,0.65
2021-01-01 00:15:00,0.88,0.32,0.64
2021-01-01 00:30:00,0.88,0.32,0.65
2021-01-01 00:45:00,0.96,0.31,0.86
2021-01-01 01:00:00,0.99,0.31,0.85
2021-01-01 01:15:00,0.99,0.35,0.75
2021-01-01 01:30:00,0.97,0.35,0.68
2021-01-01 01:45:00,0.95,0.34,0.66
2021-01-01 02:00:00,0.93,0.33,0.61
2021-01-01 02:15:00,0.9,0.3,0.54


In [34]:
j = 0
station = "DGL"
gate_loc = gatef['ID'][j]
name = gatef['name'][j]
start_zoom = '%s-06-21'%year
end_zoom = '%s-08-28'%year
gate_up = (sc2_gateop.iloc[:,j] >= 10)
vel_zoom = vel[gate_loc][start_zoom:end_zoom]
# print(vel_zoom)
vel_zoom_df = pd.DataFrame(vel_zoom)
vel_zoom_df['datetime'] = vel_zoom_df.index
vel_zoom_df = vel_zoom_df.reset_index(drop=True)
vel_zoom_df.to_csv("test_vel.csv")
#To count proportion

vel_zoom_df['Velocity_Category'] = np.where(vel_zoom_df['GLC'] >= 8, "Over 8ft/s", "Under 8ft/s")
#.shift shift value down and compare each value with the previous row; increase value when rows are different
vel_zoom_df['consecutive_groups'] = (vel_zoom_df['Velocity_Category'] != vel_zoom_df['Velocity_Category'].shift()).cumsum()
# print(vel_zoom_df)
vel_zoom_df['min_datetime'] = vel_zoom_df.groupby('consecutive_groups')['datetime'].transform('min')
vel_zoom_df['max_datetime'] = vel_zoom_df.groupby('consecutive_groups')['datetime'].transform('max')
vel_zoom_df['date'] = vel_zoom_df['datetime'].dt.date.astype(str)
consecutive_streaks_vel = vel_zoom_df.groupby(['consecutive_groups', 'Velocity_Category', 'min_datetime', 'max_datetime']).size().reset_index(name='count')
consecutive_streaks_vel['streak_duration'] = consecutive_streaks_vel['count'] * 15 / 60
consecutive_streaks_vel_clean = consecutive_streaks_vel.drop(['consecutive_groups', 'Velocity_Category', 'max_datetime'], axis=1)
merged_df = pd.merge(vel_zoom_df, consecutive_streaks_vel_clean,left_on="min_datetime", right_on="min_datetime")
# print(merged_df.head(5))
over_streaks = consecutive_streaks_vel[consecutive_streaks_vel['Velocity_Category'] == 'Over 8ft/s']
under_streaks = consecutive_streaks_vel[consecutive_streaks_vel['Velocity_Category'] == 'Under 8ft/s']

# Total duration for over and under 8ft/s
total_over_duration = over_streaks['count'].sum()
total_under_duration = under_streaks['count'].sum()

over_streak_duration =  over_streaks['count'] * 15 / 60
under_streak_duration = under_streaks['count'] * 15 / 60

# Group by the Velocity Category and count the number of days in each category
vel_count_df = vel_zoom_df.groupby('Velocity_Category').size().reset_index(name='count')

total_time_df = pd.DataFrame({
    'Velocity_Category': ['Over 8ft/s', 'Under 8ft/s'],
    'Total Hours': [total_over_duration * 15 / 60, total_under_duration * 15 / 60]
})

consecutive_streaks_vel.rename(columns={
    "Velocity_Category": "Status"},
    inplace = True)
# print(consecutive_streaks_vel)

In [35]:
gate_up = (sc2_gateop.iloc[:,0] >= 10)
gate_up = gate_up[start_zoom:end_zoom]
gate_up_df = pd.DataFrame(gate_up)
gate_up_df['datetime'] = gate_up_df.index
gate_up_df = gate_up_df.reset_index(drop=True)
gate_up_df['consecutive_groups'] = (gate_up_df['DGL'] != gate_up_df['DGL'].shift()).cumsum()
gate_up_df['min_datetime'] = gate_up_df.groupby('consecutive_groups')['datetime'].transform('min')
gate_up_df['max_datetime'] = gate_up_df.groupby('consecutive_groups')['datetime'].transform('max')
consecutive_streaks = gate_up_df.groupby(['consecutive_groups', 'DGL', 'min_datetime', 'max_datetime']).size().reset_index(name='count')
consecutive_streaks['streak_duration'] = consecutive_streaks['count'] * 15 / 60
consecutive_streaks_clean = consecutive_streaks.drop(['DGL', 'consecutive_groups', 'max_datetime'], axis = 1)
merged_gate_df = pd.merge(gate_up_df, consecutive_streaks_clean,left_on="min_datetime", right_on="min_datetime")
merged_gate_df = merged_gate_df.drop(['consecutive_groups'], axis=1)
merged_gate_df = merged_gate_df.rename(columns={"min_datetime": "gate_min_datetime", 
                                                "max_datetime": "gate_max_datetime",
                                                "count": "gate_count",
                                                "streak_duration": "gate_streak_duration"})
# merged_gate_df['date'] = merged_gate_df['datetime'].dt.date.astype(str)
# consecutive_streaks = consecutive_streaks[consecutive_streaks['date'] == target_date]
# consecutive_streaks = consecutive_streaks[consecutive_streaks['date'] == '2021-07-21']
consecutive_streaks.rename(columns={
    "DGL": "Status"},
    inplace = True)


In [36]:
gate_up

2021-06-21 00:00:00    False
2021-06-21 00:15:00    False
2021-06-21 00:30:00    False
2021-06-21 00:45:00    False
2021-06-21 01:00:00    False
2021-06-21 01:15:00    False
2021-06-21 01:30:00    False
2021-06-21 01:45:00    False
2021-06-21 02:00:00    False
2021-06-21 02:15:00    False
2021-06-21 02:30:00    False
2021-06-21 02:45:00    False
2021-06-21 03:00:00    False
2021-06-21 03:15:00    False
2021-06-21 03:30:00    False
2021-06-21 03:45:00    False
2021-06-21 04:00:00    False
2021-06-21 04:15:00     True
2021-06-21 04:30:00     True
2021-06-21 04:45:00     True
2021-06-21 05:00:00     True
2021-06-21 05:15:00     True
2021-06-21 05:30:00     True
2021-06-21 05:45:00     True
2021-06-21 06:00:00     True
2021-06-21 06:15:00     True
2021-06-21 06:30:00     True
2021-06-21 06:45:00     True
2021-06-21 07:00:00     True
2021-06-21 07:15:00     True
2021-06-21 07:30:00     True
2021-06-21 07:45:00     True
2021-06-21 08:00:00     True
2021-06-21 08:15:00     True
2021-06-21 08:

In [37]:
joint_df = pd.concat([consecutive_streaks, consecutive_streaks_vel])

chart = alt.Chart(joint_df).mark_rect(size=20, opacity=0.7).encode(
    y=alt.Y('Status:O', title='Status', axis=alt.Axis(titleFontSize=14)),
    x=alt.X('min_datetime:T', title='Date Time', axis=alt.Axis(titleFontSize=14)),
    x2='max_datetime:T',
    color="Status:O",
    tooltip=[
        alt.Tooltip('Status:O', title='Status'),
        # alt.Tooltip('min_datetime:T', title='Start Time'),
        # alt.Tooltip('max_datetime:T', title='End Time'),
        alt.Tooltip('streak_duration:Q', title='Duration(hour)')]
).properties(
    title={
        "text": ["Gate And Water Velocity Status Throughout the Day"],
        "subtitle": "Gate Open/Close, Water Velocity Over/Under 8m/s Over Time",
        "fontSize": 16,
        "subtitleFontSize": 12,
        "anchor": "start",
        "color": "black"
    },
    width=600,
    height=400
).configure_axis(
    grid=True,
    labelFontSize=12,
    titleFontSize=14,
).configure_title(
    fontSize=18,
    anchor='start'
)

# Show the chart
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [38]:
full_merged_df = pd.merge(merged_df, merged_gate_df, left_on="datetime", right_on="datetime")
full_merged_df['time_unit'] = 0.25
full_merged_df['DGL'] = np.where(full_merged_df['DGL'], "Closed", "Open")
xaxis_values = full_merged_df['date'].unique()
# full_merged_df.to_csv('test_vel.csv')
full_merged_df.head()
print(full_merged_df['date'].unique())

['2021-06-21' '2021-06-22' '2021-06-23' '2021-06-24' '2021-06-25'
 '2021-06-26' '2021-06-27' '2021-06-28' '2021-06-29' '2021-06-30'
 '2021-07-01' '2021-07-02' '2021-07-03' '2021-07-04' '2021-07-05'
 '2021-07-06' '2021-07-07' '2021-07-08' '2021-07-09' '2021-07-10'
 '2021-07-11' '2021-07-12' '2021-07-13' '2021-07-14' '2021-07-15'
 '2021-07-16' '2021-07-17' '2021-07-18' '2021-07-19' '2021-07-20'
 '2021-07-21' '2021-07-22' '2021-07-23' '2021-07-24' '2021-07-25'
 '2021-07-26' '2021-07-27' '2021-07-28' '2021-07-29' '2021-07-30'
 '2021-07-31' '2021-08-01' '2021-08-02' '2021-08-03' '2021-08-04'
 '2021-08-05' '2021-08-06' '2021-08-07' '2021-08-08' '2021-08-09'
 '2021-08-10' '2021-08-11' '2021-08-12' '2021-08-13' '2021-08-14'
 '2021-08-15' '2021-08-16' '2021-08-17' '2021-08-18' '2021-08-19'
 '2021-08-20' '2021-08-21' '2021-08-22' '2021-08-23' '2021-08-24'
 '2021-08-25' '2021-08-26' '2021-08-27' '2021-08-28']


In [39]:
# summary_stats = (full_merged_df.groupby(["date", "Velocity_Category"]).
#     agg(
#         total_velocity_duration = ("time_unit", "sum")
#     )).reset_index()
# print(summary_stats)
# print(full_merged_df)
daily_velocity = full_merged_df.groupby(["Velocity_Category", "date"])["time_unit"].sum().reset_index()
avg_daily_velocity = daily_velocity.groupby("Velocity_Category")['time_unit'].mean().reset_index()
print(daily_velocity.head(15))
print(avg_daily_velocity) 
# print(avg_velocity)

   Velocity_Category date         time_unit
0   Over 8ft/s        2021-06-21 10.50     
1   Over 8ft/s        2021-06-22 10.75     
2   Over 8ft/s        2021-06-23 10.50     
3   Over 8ft/s        2021-06-24 10.25     
4   Over 8ft/s        2021-06-25  9.50     
5   Over 8ft/s        2021-06-26  9.75     
6   Over 8ft/s        2021-06-27 10.50     
7   Over 8ft/s        2021-06-28 10.75     
8   Over 8ft/s        2021-06-29 10.50     
9   Over 8ft/s        2021-06-30  9.75     
10  Over 8ft/s        2021-07-01  9.25     
11  Over 8ft/s        2021-07-02  9.75     
12  Over 8ft/s        2021-07-03  8.75     
13  Over 8ft/s        2021-07-04  8.75     
14  Over 8ft/s        2021-07-05  8.75     
  Velocity_Category  time_unit
0   Over 8ft/s       9.61     
1  Under 8ft/s      14.39     


In [40]:
drop_down_date = full_merged_df['date'].unique()
date_dropdown = alt.binding_select(options=drop_down_date, name="Select Date:")
date_selection = alt.selection_single(fields=["date"], bind=date_dropdown, empty = "none")
interval = alt.selection_interval(encodings=['x'])
# input_dropdown = alt.binding_select(options=drop_down_date, name='Date')
# drop_selection = alt.selection_interval(fields=["datetime"], bind=input_dropdown)

base = alt.Chart(full_merged_df).mark_line(color = "darkblue").encode(
    x=alt.X('yearmonthdatehoursminutes(datetime):T', title='Datetime', axis=alt.Axis(format='%b %d, %Y', 
                                                                                     labelAngle=-45,
                                                                                     title='Date')),
    y=alt.Y('GLC:Q', title='Velocity (ft/s)'),
).add_selection(
    interval,
    # drop_selection
).properties(
    title="Flow Velocity and Gate Status Zoomed"
)
# avg_line_velocity = alt.Chart(avg_daily_velocity).mark_rule(color='red', strokeDash=[5, 5]).encode(
#     y=alt.Y('time_unit:Q', title="Average Time (Hours)"),
#     detail='Velocity_Category:N'  # Ensure the line is drawn for each category
# )
vel_bar_chart = alt.Chart(full_merged_df).mark_bar(color="orange").encode(
    x=alt.X('Velocity_Category:N', title='Velocity Category'),
    y=alt.Y('sum(time_unit):Q', title='Total Velocity Duration (Hours)'),
    # color="blue",
    # color=alt.Color('DGL:N', title='Gate Status', scale=alt.Scale(scheme='dark2')),
    tooltip = alt.Tooltip('sum(time_unit):Q', title="Total Velocity Duration (Hours)")
).properties(
    width=300,
    height=300,
    title="Comparison of Velocity Category Duration",
    # fontSize = 16
).transform_filter(
    interval
    
).interactive()

gate_bar_chart = alt.Chart(full_merged_df).mark_bar(color = "steelblue").encode(
    x=alt.X('DGL:N', title='Gate Status'),
    y=alt.Y('sum(time_unit):Q', title='Total Gate Status Duration (Hours)'),
    # color=alt.Color('DGL:N', title='Gate Status', scale=alt.Scale(scheme='dark2')),
    tooltip = alt.Tooltip('sum(time_unit):Q', 
    title="Total Gate Status Duration (Hours)")
).properties(
    width=300,
    height=300,
    title="Comparison of Gate Status Duration",
    # fontSize = 16
).transform_filter(
    interval
).interactive()

area_dgl_true = alt.Chart(full_merged_df).mark_rect(
    # opacity=0.006,
    color='orange'
).encode(
    x='gate_min_datetime:T',
    x2='gate_max_datetime:T',
    opacity=alt.value(0.008)
).transform_filter(
    alt.datum.DGL == "Closed"
)
yrule = (
    alt.Chart().mark_rule(strokeDash=[12, 6], size=2, color = 'red').encode(
        y=alt.datum(8))
).properties(
    width=500,
    height=300
).encode(
        tooltip=alt.TooltipValue('8 ft/s Threshold')  # Static tooltip for the rule line
    )

nearest = alt.selection_point(nearest=True, on="pointerover",
                              fields=["datetime"], empty=False)
points = base.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
# # Draw a rule at the location of the selection
rules = alt.Chart(full_merged_df).transform_calculate(
    FlowVelocityDuration = "'Flow ' + datum.Velocity_Category + ' duration is ' + datum.streak_duration + ' hours'",
    GateStatusDuration = "'Gate ' + datum.DGL + ' duration is ' + datum.gate_streak_duration + ' hours'"
).mark_rule(color="gray").encode(
    x="datetime:T",
    opacity=alt.condition(nearest, alt.value(0.3), alt.value(0)),
    tooltip=[alt.Tooltip('yearmonthdatehoursminutes(datetime):T', title='Datetime'),
             alt.Tooltip('GLC:Q', title= "Velocity (ft/s)", format=".2f"),
             alt.Tooltip('FlowVelocityDuration:N', title="Flow Velocity Duration"),
             alt.Tooltip('GateStatusDuration:N', title="Gate Status Duration")
             ],
).add_params(nearest)

vel_text = alt.Chart(full_merged_df).mark_text(align='right').encode(
    y=alt.Y('stat:N', axis=None),
    text=alt.Text('summary:N')
).transform_filter(
    interval
).transform_aggregate(
    max_velocity='max(GLC)',  
    min_velocity='min(GLC)',
    avg_velocity='mean(GLC)'
).transform_fold(
    ['max_velocity', 'min_velocity', 'avg_velocity'],  # Separate each statistic
    as_=['stat', 'value']
).transform_calculate(
    summary='datum.stat + ": " + format(datum.value, ".2f")'
)
velocity = vel_text.encode(text='summary:N').properties(
    title=alt.Title(text='Flow Velocity Summary', align='center')
)

vel_duration_text = alt.Chart(full_merged_df).mark_text(align='right').encode(
    y=alt.Y('Velocity_Category:N', axis=None),
    text=alt.Text('summary:N')
).transform_filter(
    interval
).transform_aggregate(
    total_time='sum(time_unit)',
    groupby=["Velocity_Category"]
).transform_fold(
    ['total_time'],  # Separate each statistic
    as_=['stat', 'value']
).transform_calculate(
    summary='datum.Velocity_Category + ": " + format(datum.total_time, ".2f") + " hours"'
)
velocity_duration = vel_duration_text.encode(text='summary:N').properties(
    title=alt.Title(text='Velocity Duration Summary', align='center')
)

gate_duration_text = alt.Chart(full_merged_df).mark_text(align='right').encode(
    y=alt.Y('DGL:N', axis=None),
    text=alt.Text('summary:N')
).transform_filter(
    interval
).transform_aggregate(
    total_time='sum(time_unit)',
    groupby=["DGL"]
).transform_fold(
    ['total_time'],  # Separate each statistic
    as_=['stat', 'value']
).transform_calculate(
    summary='datum.DGL + ": " + format(datum.total_time, ".2f") + " hours"'
)
gate_duration = gate_duration_text.encode(text='summary:N').properties(
    title=alt.Title(text='Gate Status Duration Summary', align='center')
)



combined_text = alt.vconcat(
    velocity,
    velocity_duration,
    gate_duration,
    # velocity_daily_duration
)
# vel_chart_with_avg = vel_bar_chart + avg_line_velocity
combined_bar_charts = alt.hconcat(
    vel_bar_chart,
    gate_bar_chart 
)
combined_chart = alt.vconcat(
    alt.layer(base, points, yrule, rules, area_dgl_true),
    # upper,
    combined_bar_charts)
combined_chart_with_text = alt.hconcat(combined_chart, combined_text).configure_title(
        fontSize=17,
        color = "black",
        font='Arial'
    )
    
combined_chart_with_text
#add summary stats/per day
#add average line to bar chart
#fix datetime in time series
#selector by day
#Generate weekly summary 
#someway to show amount of time selected (start/end date)



MaxRowsError: The number of rows in your dataset is greater than the maximum allowed (5000).

See https://altair-viz.github.io/user_guide/large_datasets.html for information on how to plot large datasets, including how to install third-party data management tools and, in the right circumstance, disable the restriction

alt.HConcatChart(...)

In [41]:
import datetime as dt
print(full_merged_df.info())
full_merged_df['week'] = full_merged_df['datetime'].dt.isocalendar().week
full_merged_df['week'].unique()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6624 entries, 0 to 6623
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   GLC                   6624 non-null   float64       
 1   datetime              6624 non-null   datetime64[ns]
 2   Velocity_Category     6624 non-null   object        
 3   consecutive_groups    6624 non-null   int32         
 4   min_datetime          6624 non-null   datetime64[ns]
 5   max_datetime          6624 non-null   datetime64[ns]
 6   date                  6624 non-null   object        
 7   count                 6624 non-null   int64         
 8   streak_duration       6624 non-null   float64       
 9   DGL                   6624 non-null   object        
 10  gate_min_datetime     6624 non-null   datetime64[ns]
 11  gate_max_datetime     6624 non-null   datetime64[ns]
 12  gate_count            6624 non-null   int64         
 13  gate_streak_durati

<IntegerArray>
[25, 26, 27, 28, 29, 30, 31, 32, 33, 34]
Length: 10, dtype: UInt32

In [42]:
weekly_velocity = full_merged_df.groupby(["week", "Velocity_Category"])["time_unit"].sum().reset_index()
weekly_velocity

Unnamed: 0,week,Velocity_Category,time_unit
0,25,Over 8ft/s,71.75
1,25,Under 8ft/s,96.25
2,26,Over 8ft/s,67.5
3,26,Under 8ft/s,100.5
4,27,Over 8ft/s,61.25
5,27,Under 8ft/s,106.75
6,28,Over 8ft/s,71.0
7,28,Under 8ft/s,97.0
8,29,Over 8ft/s,69.25
9,29,Under 8ft/s,98.75


In [67]:
summary_stats_dgl


Unnamed: 0,week,date,DGL,total_gate_duration
0,25,2021-06-21,Closed,16.75
1,25,2021-06-21,Open,7.25
2,25,2021-06-22,Closed,17.0
3,25,2021-06-22,Open,7.0
4,25,2021-06-23,Closed,16.25
5,25,2021-06-23,Open,7.75
6,25,2021-06-24,Closed,16.25
7,25,2021-06-24,Open,7.75
8,25,2021-06-25,Closed,16.25
9,25,2021-06-25,Open,7.75


In [81]:
# print(full_merged_df.head())
daily_velocity = full_merged_df.groupby(["week", "date", "Velocity_Category"])["time_unit"].sum().reset_index()
avg_daily_velocity = daily_velocity.groupby("Velocity_Category")['time_unit'].mean().reset_index()

daily_gate = full_merged_df.groupby(["week", "date","DGL"])["time_unit"].sum().reset_index()
avg_daily_gate = daily_gate.groupby("DGL")['time_unit'].mean().reset_index()

# print(daily_gate.head(15))
# print(avg_daily_velocity) 
unique_dates = full_merged_df['date'].unique()

avg_line_velocity = alt.Chart(avg_daily_velocity).mark_rule(strokeWidth=2, color='red', strokeDash=[5, 5]).encode(
    y=alt.Y('time_unit:Q'),
    detail='Velocity_Category:N',
    tooltip=alt.Tooltip('time_unit:Q',  format=".2f")  
)
avg_line_gate = alt.Chart(avg_daily_gate).mark_rule(strokeWidth=4, color='red', strokeDash=[5, 5]).encode(
    y=alt.Y('time_unit:Q'),
    detail='DGL:N',
    # alt.Tooltip('GLC:Q', title= "Velocity (ft/s)", format=".2f")
    tooltip=alt.Tooltip('time_unit:Q',  format=".2f")  # Static tooltip for the rule line
    ).interactive()

summary_stats_vel = (full_merged_df.groupby(["week", "date", "Velocity_Category"]).
    agg(
        total_velocity_duration = ("time_unit", "sum")
    )).reset_index()

summary_stats_dgl = (full_merged_df.groupby(["week", "date", "DGL"]).
    agg(
        total_gate_duration = ("time_unit", "sum")
    )).reset_index()

unique_weeks = list(full_merged_df.week.unique())
week_dropdown = alt.binding_select(options=unique_weeks, name="Select Week:")
week_selection = alt.selection_single(fields=["week"], bind=week_dropdown, empty = "none")

# Create the bar graph
vel_bar_chart = alt.Chart(summary_stats_vel).mark_bar(color="orange").encode(
        x=alt.X("date", title="Velocity Category", axis=alt.Axis()),
        y=alt.Y("total_velocity_duration:Q", title="Hours"),
        color="Velocity_Category:N",
        # column="Velocity_Category:N",
        tooltip=["Velocity_Category:N", "total_velocity_duration:Q"],
# ).add_selection(week_selection
).properties(
    title="Daily Velocity Over/Under 8 ft/s Duration Summary", width=200, height=400
)
vel_bar_chart
# vel_chart_with_avg = vel_bar_chart + avg_line_velocity

gate_bar_chart = alt.Chart(summary_stats_dgl).mark_bar(color="steelblue").encode(
        x=alt.X("date", title="Gate Status",axis=alt.Axis()),
        y=alt.Y("total_gate_duration:Q", title="Hours"),
        color="DGL:N",
        # column="DGL:N",
        tooltip=["DGL:N", "total_gate_duration:Q"],
# ).add_selection(week_selection
).properties(
    title="Daily Gate Status Duration Summary", width=200, height=400
)
# gate_chart_with_avg = gate_bar_chart + avg_line_gate
combined_bar_charts = alt.hconcat(
    gate_bar_chart,
    vel_bar_chart 
)
combined_bar_charts
#create weekly time series with bars
#eventually will have a 100 year time series
#whats the day thats the most above 8/fts
#look at daily variability/similarity over the entire summer (stacked bar) 
#gate open = low flow
#gate close = high flow (water pushed to passage)

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [17]:
clean_merged_df = full_merged_df.drop(["gate_min_datetime",
                                      "gate_max_datetime",
                                      "consecutive_groups",
                                      "min_datetime",
                                      "max_datetime",
                                      "gate_count",
                                      "date",
                                      "count"],axis=1)
clean_merged_df_renamed = clean_merged_df.rename(columns={
    "GLC": "velocity",
    "DGL": "gate_status",
    "gate_streak_duration": "gate_open_close_duration_hr",
    "streak_duration":"velocity_over_under_8_duration_hr",
    "Velocity_Category": "velocity_over_under_8_status"
})
# print(clean_merged_df_renamed.columns)
clean_merged_df_renamed = clean_merged_df_renamed[["datetime", 
                                                   "velocity", 
                                                   "velocity_over_under_8_status",
                                                   "velocity_over_under_8_duration_hr",
                                                   "gate_status",
                                                   "gate_open_close_duration_hr"]]
print(clean_merged_df_renamed.head(5))

  datetime             velocity velocity_over_under_8_status  velocity_over_under_8_duration_hr gate_status  gate_open_close_duration_hr
0 2021-07-21 00:00:00 -0.59      Under 8ft/s                 5.25                                Closed     4.25                        
1 2021-07-21 00:15:00 -0.63      Under 8ft/s                 5.25                                Closed     4.25                        
2 2021-07-21 00:30:00 -0.67      Under 8ft/s                 5.25                                Closed     4.25                        
3 2021-07-21 00:45:00 -0.71      Under 8ft/s                 5.25                                Closed     4.25                        
4 2021-07-21 01:00:00 -0.73      Under 8ft/s                 5.25                                Closed     4.25                        
