In [1]:
import pandas as pd
import numpy as np
import xarray as xr



In [2]:
# function to test
def qaqc_precip_logic_accum_amounts(df):
    """
    Ensures that precipitation accumulation amounts are consistent with reporting time frame.
    Only needs to be applied when 2 or more precipitation duration specific
    variables are present (pr_5min, pr_1h, pr_24h)
    For example: pr_5min should not be larger than pr_1h
    """
    # pr: Precipitation accumulated since last record
    # pr_5min: Precipitation accumulated in last 5 minutes
    # pr_1h: Precipitation accumulated in last hour
    # pr_24h: Precipitation accumulated from last 24 hours
    # pr_localmid: Precipitation accumulated from local midnight
        
    # rules
    # pr_5min < pr_1h < pr_24h
    # none of these time duration vars should be compared to pr_localmid
    # depending on the reporting interval, pr can be equivalent to any of duration vars, or is accumulated

    # determine which precipitation vars are present
    pr_vars = [col for col in df.columns if 'pr_' in col] # excludes 'pr' variable
    pr_vars = [item for item in pr_vars if "qc" not in item] # excludes raw/eraqc variable
    pr_vars = [item for item in pr_vars if "duration" not in item] # excludes duration variable (if provided)
    pr_vars = [item for item in pr_vars if "localmid" not in item] # excludes 'pr_localmid' variable

    if len(pr_vars) == 0: # if station does not report any precipitation values, bypass
        print('station does not report a precipitation duration variable - bypassing precip logic check') # testing
        df = df

    elif len(pr_vars) == 1: # no need for amount check
        print('station does not report multiple precipitation duration variables - bypassing precip logic check') # testing
        df = df
        
    elif len(pr_vars) >= 1: 
        print(pr_vars)
        if 'pr_5min' in pr_vars:
            if 'pr_1h' in pr_vars:
                df.loc[df['pr_5min'] > df['pr_1h'], 'pr_5min_eraqc'] = 15 # see era_qaqc_flag_meanings.csv
            if 'pr_24h' in pr_vars:
                df.loc[df['pr_5min'] > df['pr_24h'], 'pr_5min_eraqc'] = 15 # see era_qaqc_flag_meanings.csv
            print('Precip 5min eraqc flags (any other value than nan is an active flag!): {}'.format(df['pr_5min_eraqc'].unique())) # testing

        if 'pr_1h' in pr_vars:
            if 'pr_5min' in pr_vars:
                df.loc[df['pr_1h'] < df['pr_5min'], 'pr_1h_eraqc'] = 16 # see era_qaqc_flag_meanings.csv
            if 'pr_24h' in pr_vars:
                df.loc[df['pr_1h'] > df['pr_24h'], 'pr_1h_eraqc'] = 15 # see era_qaqc_flag_meanings.csv
            print('Precip 1h eraqc flags (any other value than nan is an active flag!): {}'.format(df['pr_1h_eraqc'].unique())) # testing

        if 'pr_24h' in pr_vars:
            if 'pr_5min' in pr_vars:
                df.loc[df['pr_24h'] < df['pr_5min'], 'pr_24h_eraqc'] = 14
            if 'pr_1h' in pr_vars:
                df.loc[df['pr_24h'] < df['pr_1h'], 'pr_24h_eraqc'] = 14 
            print('Precip 24h eraqc flags (any other value than nan is an active flag!): {}'.format(df['pr_24h_eraqc'].unique())) # testing

    return df

In [3]:
# read in dummy file for testing
test_df = pd.read_csv('dummy_pr_data.csv')
for var in test_df.columns:
    if var != "note" and var != "desired_behavior":
        test_df[var+'_eraqc'] = np.nan

In [4]:
qaqc_precip_logic_accum_amounts(test_df)

['pr_5min', 'pr_1h', 'pr_24h']
Precip 5min eraqc flags (any other value than nan is an active flag!): [15. nan]
Precip 1h eraqc flags (any other value than nan is an active flag!): [16. 15. nan]
Precip 24h eraqc flags (any other value than nan is an active flag!): [nan 14.]


Unnamed: 0,pr_5min,pr_1h,pr_24h,note,desired_behavior,pr_5min_eraqc,pr_1h_eraqc,pr_24h_eraqc
0,15.0,10.0,20,5min larger than 1h,flag 5min and 1h,15.0,16.0,
1,20.0,10.0,15,5min larger than 24h,flag all,15.0,16.0,14.0
2,1.0,0.5,2,1h smaller than 5min,flag 5min and 1h,15.0,16.0,
3,2.0,15.0,3,1h larger than 24h,flag 1h and 24h,,15.0,14.0
4,10.0,15.0,0,24h smaller than 5min,flag all,15.0,15.0,14.0
5,1.0,2.0,1,24h smaller than 1h,flag 1h and 24h,,15.0,14.0
6,1.0,5.0,10,no flags,no flags,,,
7,,15.0,10,flag with nan,flag 1h and 24h,,15.0,14.0
