# Summarize QA/QC Results 

Create summary tables from QAQC-PFRA notebook scraps in one or more folders on Amazon Web Services' S3.

### _Development Notebook_

### Import libraries

In [1]:
import sys;sys.path.append('../')
import gdal
from hecrasio.core import *
from hecrasio.qaqc import *
from hecrasio.s3tools import *
import scrapbook as sb
import pandas as pd
import shutil

### Set pandas display options

In [2]:
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 1000)

### Identify notebooks on S3

In [3]:
# Either of these will work

# pull_scraps(single_folder = "s3://azavea/jwx_test/nbs/nbs_all/")

books = pull_scraps(multi_folder = 'yes', bucket = 'azavea', prefix = 'jwx_test/nbs/nb0', name_selector = 'DC')

### Create QA/QC results table

In [4]:
results = make_qaqc_table(books)
results.head(5)

Unnamed: 0,1D Cores,1D Methodology,1D2D Flow Tolerance,1D2D MaxIter,1D2D Minimum Flow Tolerance,1D2D WS Tolerance,2D Boundary Condition Ramp Up Fraction,2D Boundary Condition Volume Check,2D Cores,2D Coriolis,2D Eddy Viscosity Transverse Mixing Coefficient,2D Equation Set,2D Initial Conditions Ramp Up Time (hrs),2D Latitude for Coriolis,2D Maximum Iterations,2D Names,2D Number of Time Slices,2D Only,2D Theta,2D Theta Warmup,2D Volume Tolerance,2D Water Surface Tolerance,Base Output Interval,Computation Time Courant Method,Computation Time DSS,Computation Time Step Base,Computation Time Step Count To Double,Computation Time Step Max Courant,Computation Time Step Max Doubling,Computation Time Step Max Halving,Computation Time Step Min Courant,Computation Time Total,Flow Filename,Flow Title,Geometry Filename,Geometry Title,HDF Chunk Size,HDF Compression,HDF Fixed Rows,HDF Flush Buffer,HDF Spatial Parts,HDF Use Max Rows,HDF Write Face Node Velocities,HDF Write Time Slices,HDF Write Warmup,Maximum WSEL Error,Pardiso Solver,Plan Filename,Plan Name,Plan ShortID,Plan Title,Run Time Window,Simulation End Time,Simulation Start Time,Solution,Time Solution Went Unstable,Time Stamp Solution Went Unstable,Time Window,Vol Accounting,Vol Accounting 2D Bdry,Vol Accounting Cell End,Vol Accounting Cell Start,Vol Accounting DS Outflow,Vol Accounting Error,Vol Accounting Error Percentage,Vol Accounting Groundwater,"Vol Accounting LS, Pump Div",Vol Accounting Lat Hydro,Vol Accounting US Inflow
DC_F01_NBR_E0006,0,Finite Difference,0.1,0,1,0.01,[0.1000000015],[False],0,False,[3.402823466e+38],[Dynamic Wave],[3.402823466e+38],[3.402823466e+38],[20],[D01],[1],True,[1.0],[1.0],[0.0099999998],[0.0099999998],15MIN,Representative Length/Velocity,00:00:01,30SEC,2,2,2,1,0.5,11:02:30,DC_F01_NBR.u01,E0006,DC_F01_NBR.g01,D01,1,1,1,False,1,0,False,False,False,0,False,DC_F01_NBR.p01,test_scaling,test_scaling,test_scaling,12AUG2019 20:46:51 to 13AUG2019 07:49:19,05May2000 10:00:0,01May2000 10:00:0,Unsteady Finished Successfully,00:00:00,Not Applicable,01May2000 10:00:00 to 05May2000 10:00:0,Volume Accounting in Acre Feet,0,99545.4,84515.9,0,73.0143,0.00762016,0,0,0,0
DC_F01_NBR_E0005,0,Finite Difference,0.1,0,1,0.01,[0.1000000015],[False],0,False,[3.402823466e+38],[Dynamic Wave],[3.402823466e+38],[3.402823466e+38],[20],[D01],[1],True,[1.0],[1.0],[0.0099999998],[0.0099999998],15MIN,Representative Length/Velocity,00:00:00,30SEC,2,2,2,1,0.5,11:28:49,DC_F01_NBR.u01,E0005,DC_F01_NBR.g01,D01,1,1,1,False,1,0,False,False,False,0,False,DC_F01_NBR.p01,test_scaling,test_scaling,test_scaling,12AUG2019 20:46:47 to 13AUG2019 08:15:35,05May2000 10:00:0,01May2000 10:00:0,Unsteady Finished Successfully,00:00:00,Not Applicable,01May2000 10:00:00 to 05May2000 10:00:0,Volume Accounting in Acre Feet,0,102312.0,84515.9,0,-43.0278,0.00427186,0,0,0,0


### Create unique values summary table
**Not implimented:** If length of unque value list is equal to a single value, ignore attribute and return that all values are the same.

In [11]:
relevant_columns = ['Base Output Interval',
                    '2D Equation Set',
                    '2D Volume Tolerance',
                    '2D Latitude for Coriolis',
                    '2D Water Surface Tolerance',
                    'Solution',
                    'Vol Accounting Error']

unique_df = identify_unique_values(results, relevant_columns)

unique_df.head(10)

Unnamed: 0_level_0,Unique_Values
Result_Attribute,Unnamed: 1_level_1
Base Output Interval,[15MIN]
2D Equation Set,[Dynamic Wave]
2D Volume Tolerance,[0.0099999998]
2D Latitude for Coriolis,[3.402823466e+38]
2D Water Surface Tolerance,[0.0099999998]
Solution,[Unsteady Finished Successfully]
Vol Accounting Error,"[-43.0277557373, 73.0143432617]"


### Create threshold summary table

In [66]:
def fancy_report(nbs:list, values:list) -> None:
    print("{0: <20} {1}".format('Notebook', 'Value'))
    print("-"*79)
    for i in range(len(nbs)):
        print("{0: <20} {1}".format(nbs[i], values[i]))

def report_header(variable:str):
    print("\nNow evaluating {}...\n".format(variable))
    print("The following notebooks have alarming values for this attribute\n")

In [67]:
for i in unique_df.index:
    if i == 'Vol Accounting Error':
        report_header(i)
        nbs = results[results[i] > 0][i].index
        values = results[results[i] > 0][i].values
        fancy_report(nbs, values)
        print("-"*79)
    elif i == 'Solution':
        report_header(i)
        nbs = results[results[i] != 'Lizard Thicket'][i].index
        values = results[results[i] != 'Lizard Thicket'][i].values
        fancy_report(nbs, values)
        print("-"*79)


Now evaluating Solution...

The following notebooks have alarming values for this attribute

Notebook             Value
-------------------------------------------------------------------------------
DC_F01_NBR_E0006     Unsteady Finished Successfully
DC_F01_NBR_E0005     Unsteady Finished Successfully
-------------------------------------------------------------------------------

Now evaluating Vol Accounting Error...

The following notebooks have alarming values for this attribute

Notebook             Value
-------------------------------------------------------------------------------
DC_F01_NBR_E0006     73.0143432617
-------------------------------------------------------------------------------


In [50]:
results[results['Vol Accounting Error'] > -400]['Vol Accounting Error'].index

Index(['DC_F01_NBR_E0006', 'DC_F01_NBR_E0005'], dtype='object')

In [24]:
results[results['Vol Accounting Error'] > 0]['Vol Accounting Error'].values[0]

73.0143432617

In [17]:
# Example warnings
for i in df_w_uniques.index:
    values = df_w_uniques.loc[i][0]
    if i == '1D Cores':
        validate_by_threshold(df_w_uniques, i, values, 0, results_table)
    elif i == 'Vol Accounting Error':
        validate_by_threshold(df_w_uniques, i, values, 30, results_table)
    elif i == 'Vol Accounting Error Percentage':
        validate_by_threshold(df_w_uniques, i, values, 0.001, results_table)

df_w_uniques.head(5)

Unnamed: 0_level_0,Unique_Values,Warnings,Offending_Nbs
Result_Attribute,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Computation Time Step Count To Double,[2],,
Computation Time Courant Method,[Representative Length/Velocity],,
2D Eddy Viscosity Transverse Mixing Coefficient,[3.402823466e+38],,
2D Coriolis,[False],,
Vol Accounting Cell Start,[84515.9453125],,


### Possible evaluations
- Check output interval is the desired amount
- Check if solution populated with anything other than 'Unstead Finished Successfully'
- Check flow, geometry, and plan filenames have the same base

In [None]:
### Checks strings as the same

In [29]:
def validate_by_equivalency(pd_df:pd.DataFrame, attr:str, values:list, threshold:any, results_df: pd.DataFrame):
    pd_df.loc[attr]['Warnings'] = 'WARNING' if any([value > threshold for value in values]) else 'PASS'
    pd_df.loc[attr]['Offending_Nbs'] = [results_df.index[i] for i, value in enumerate(list(results_df[attr])) if value > threshold]

In [31]:
def validate_by_text(pd_df:pd.DataFrame, attr:str, values:list, regex: str, results_df: pd.DataFrame):
    pd_df.loc[attr]['Warnings'] = 'WARNING' if any([value != regex for value in values]) else 'PASS'
    pd_df.loc[attr]['Offending_Nbs'] = [results_df.index[i] for i, value in enumerate(list(results_df[attr])) if value != regex]

In [None]:
# Basic logic to check equivalency
interval_step = 15
interval_units = 'MIN'
intervals = [interval[:-3] for interval in test_df.loc['Base Output Interval'][0]]
units = [interval[-3:] for interval in test_df.loc['Base Output Interval'][0]]
'WARNING' if any([interval != str(interval_step) for interval in intervals]) else 'PASS'
'WARNING' if any([unit != str(interval_units) for unit in units]) else 'PASS'

# This needs to include something to evaluate these together...

In [None]:
# Impliment validate by text solution
for i in test_df.index:
    values = test_df.loc[i][0]
    if i == 'Solution':
        validate_by_text(test_df, i, values, 'Unsteady Finished Successfully', results_table)

In [33]:
# A number of these evaluations need to be done on the results table

# Basic logic for checking flow, geometry, and plan names match
for i in results_table.index:
    ff = results_table.loc[i]['Flow Filename'].split('.')[0]
    gf = results_table.loc[i]['Geometry Filename'].split('.')[0]
    pf = results_table.loc[i]['Plan Filename'].split('.')[0]
    print('WARNING') if any([name != ff for name in [ff, gf, pf]]) else print('PASS')

PASS
PASS


In [None]:
# Could develop results table and the summary evaluator...this ends up being twice the validation


# END