## Imports

In [1]:
# Imports -- Python 3.10
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd

from matplotlib import ticker
from importlib import reload
from dataretrieval import nwis
from datetime import timedelta

# Custom libs
import Src.func as fn
import Src.classes as cl
reload(fn)
reload(cl)

pd.options.mode.chained_assignment = None



## Sub_df 1: HMF Events Breakdown Dataset
The following generates a dataset containing information on every HMF event for every input gauge

In [19]:
date_range = 30
quantile = 90
test_limit = 2

site_list_df = pd.read_excel(f'Prelim_Data/_National_Metrics/National_Metrics_{date_range}_{quantile}.xlsx', dtype=fn.DATASET_DTYPES)
site_list_df = site_list_df[site_list_df['valid'] == True]
site_list = site_list_df['site_no'].tolist()
print(f'# of sites: {len(site_list)}')

# of sites: 4241


In [32]:
df_results = pd.DataFrame()

# This is a cut down version of single_site_analysis() focused only on event metrics
for i, site in enumerate(['13335050', '11447650']):
    if i == test_limit: break
    df = nwis.get_record(sites=site, service=fn.SERVICE, parameterCD=[fn.PARAM_CODE, fn.TIDAL_CODE], start=fn.DEFAULT_START, end=fn.DEFAULT_END)
    df = df.reset_index()
    
    # Only run on valid sites so this should never be the case but check anyways
    if df.empty: continue
    
    if '00060_radar sensor_Mean' in df.columns and '00060_Mean' not in df.columns:
        df.rename(columns={'00060_radar sensor_Mean': '00060_Mean'}, inplace=True)
        
    df = fn.merge_tidal(df)
    
    # Cropping to date range
    date_threshold = pd.to_datetime(fn.DEFAULT_END).date() - timedelta(days=365.25 * date_range)
    df = df[df['datetime'].dt.date >= date_threshold]
    
    threshold = fn.calc_threshold(df, (quantile / 100))
    _, hmf_series_cont = fn.filter_hmf(df, threshold)
    hmf_series_cont = fn.convert_hmf(hmf_series_cont, threshold)
    
    hmf_series_cont['00060_Mean'] = hmf_series_cont['00060_Mean'] * fn.CUBIC_FT_KM_FACTOR
    hmf_series_cont.to_csv('hmf_series_cont.csv')
    
    # Insert a dummy row at the end to ensure the last event is captured
    last = hmf_series_cont['datetime'].iloc[-1] + timedelta(days=1)
    insert = {'datetime': last, '00060_Mean': 0, 'site_no': site}
    hmf_series_cont = pd.concat([hmf_series_cont, pd.DataFrame(insert, index=[0])]).reset_index(drop=True)

    event_count = hmf = duration = 0
    event = False
    start = end = None
    for i, row in hmf_series_cont.iterrows():     
            
        # No current event and none starting now
        if row['00060_Mean'] == 0 and event == False:
            event = False
            continue
                    
        # New event starting
        if row['00060_Mean'] > 0 and event == False:
            hmf = row['00060_Mean']
            start = row['datetime'].date()
            duration = 1
            event_count += 1
            event = True
            continue
        
        # Current event continuing
        if row['00060_Mean'] > 0 and event == True:
            hmf += row['00060_Mean']
            duration += 1
            event = True
            continue
        
        # Current event ending, add to dataframe
        if row['00060_Mean'] == 0 and event == True:
            end = row['datetime'].date() - timedelta(days=1)
            data = {'site_no': site, 'event': event_count, 'start': start, 'end': end, 'hmf': hmf, 'duration': duration}
            df_results = pd.concat([df_results, pd.DataFrame(data, index=[0])])
            event = False
            continue

df_results = df_results.reset_index(drop=True)            
df_results.to_csv('events_subdf.csv')

## Sub_df 2: Annual Metrics Breakdown Dataset
The following generates a dataset including the annual metrics for each input gauge