# Unusual Repeated Streak (QAQC) testing

In [2]:
# Import libraries
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib as mpl
import glob

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [3]:
import os
import tempfile
import argparse 

# Import all qaqc script functions
try:
    from qaqc_plot import *
    from qaqc_utils import *
    from qaqc_wholestation import *
    from qaqc_logic_checks import *
    from qaqc_buoy_check import *
    from qaqc_frequent import *
    from qaqc_unusual_gaps import *
    from qaqc_unusual_large_jumps import *
    from qaqc_climatological_outlier import *
    from qaqc_unusual_streaks import *
except Exception as e:
    print("Error importing qaqc script: {}".format(e))

# Import qaqc stage calc functions
try:
    from QAQC_pipeline import *
except:
    print("Error importing QAQC_pipeline.py")

In [42]:
def qaqc_ds_to_df(ds, verbose=True):
    ## Add qc_flag variable for all variables, including elevation; 
    ## defaulting to nan for fill value that will be replaced with qc flag
    exclude_qaqc = ["time", "station", "lat", "lon", 
                    "qaqc_process", "sfcWind_method"] # lat, lon have different qc check

    raw_qc_vars = [] # qc_variable for each data variable, will vary station to station
    era_qc_vars = [] # our qc variable
    for var in ds.data_vars:
        if 'q_code' in var:
            raw_qc_vars.append(var) # raw qc variable, need to keep for comparison, then drop
        if '_qc' in var:
            raw_qc_vars.append(var) # raw qc variables, need to keep for comparison, then drop

    for var in ds.data_vars:
        if var not in exclude_qaqc and var not in raw_qc_vars:
            qc_var = var + "_eraqc" # variable/column label
            era_qc_vars.append(qc_var)
            # adds new variable in shape of original variable with designated nan fill value
            ds = ds.assign({qc_var: xr.ones_like(ds[var])*np.nan})

    # Save attributes to inheret them to the QAQC'ed file
    attrs = ds.attrs
    var_attrs = {var:ds[var].attrs for var in list(ds.data_vars.keys())}

    df = ds.to_dataframe()
    df['anemometer_height_m'] = np.ones(ds['time'].shape)*ds.anemometer_height_m.squeeze()
    df['thermometer_height_m'] = np.ones(ds['time'].shape)*ds.thermometer_height_m.squeeze()

    # De-duplicate time axis
    df = df[~df.index.duplicated()].sort_index()
                          
    # Save station/time multiindex
    MultiIndex = df.index
    station = df.index.get_level_values(0)
    df['station'] = station
    
    # Station pd.Series to str
    station = station.unique().values[0]
    
    # Convert time/station index to columns and reset index
    df = df.droplevel(0).reset_index()

    ##########################################################
    ## QAQC Functions
    # Order of operations
    # Part 1a: Whole station checks - if failure, entire station does not proceed through QA/QC
    # Part 1b: Whole station checks - if failure, entire station does proceed through QA/QC
    # Part 2: Logic checks
    # Part 3: Distribution & time series checks

    #=========================================================
    ## Part 1a: Whole station checks - if failure, entire station does not proceed through QA/QC

    #---------------------------------------------------------
    ## Missing values -- does not proceed through qaqc if failure
    stn_to_qaqc = df.copy()  # Need to define before qaqc_pipeline, in case 
    new_df = qaqc_missing_vals(stn_to_qaqc, verbose=verbose)
    if new_df is None:
        errors = print_qaqc_failed(errors, station, end_api,
        message="has an unchecked missing value",
        test="qaqc_missing_vals",
        verbose=verbose
        )
    else:
        stn_to_qaqc = new_df
        if verbose:
            print('pass qaqc_missing_vals') # testing
    return stn_to_qaqc

In [43]:
network = "VCAPCD"

rawdir, cleandir, qaqcdir, mergedir = get_file_paths(network)
# whole_station_qaqc(network, cleandir, qaqcdir, rad_scheme="remove_zeros", verbose=True, local=True)

In [57]:
ds = xr.open_dataset('Train_Files/LOXWFO_OX1MB.nc')#.isel(station=0)
df = qaqc_ds_to_df(ds)
# df

Updating missing values for: ps
Updating missing values for: tas
Updating missing values for: pr
Updating missing values for: hurs
Updating missing values for: sfcWind
Updating missing values for: sfcWind_dir
Updating missing values for: tdps_derived
pass qaqc_missing_vals


In [70]:
new_df = qaqc_unusual_repeated_streaks(df)

tas
tdps_derived
ps
sfcWind
Running qaqc_unusual_repeated_streaks on ['tas', 'tdps_derived', 'ps', 'sfcWind']


  ax.set_ylim(miny,maxy)


In [69]:
infere_res_var(df, var="ps")

ps


0.5