In [19]:
# import libraries
import xarray as xr
import numpy as np
import pandas as pd
import os
import sys
import tempfile

import math

from qaqc_eval_utils import *

sys.path.append(os.path.expanduser('../'))
# from qaqc_plot import flagged_timeseries_plot
from QAQC_pipeline import qaqc_ds_to_df


global local_tmp_dir, local_perm_dir
local_tmp_dir = "./tmp"
local_perm_dir = "../Train_Files"

for dir in [local_tmp_dir, local_perm_dir]:
    if not os.path.exists(dir):
        os.mkdir(dir)

In [2]:
# read in stations
train_stns = pd.read_csv('../qaqc_training_station_list_events.csv')
train_stns.head()

Unnamed: 0,network,era-id,elevation,latitude,longitude,start_date,end_date,event_type,notes
0,ASOSAWOS,ASOSAWOS_72281023199,-14.0208,32.832,-115.664,1984-04-13 00:00:00+00:00,2022-12-31 00:00:00+00:00,all,
1,ASOSAWOS,ASOSAWOS_72288023152,222.8088,34.2,-118.365,1943-06-01 00:00:00+00:00,2022-12-31 00:00:00+00:00,all,
2,ASOSAWOS,ASOSAWOS_72288623130,239.268,34.212,-118.491,1942-12-11 00:00:00+00:00,2022-12-31 00:00:00+00:00,all,
3,ASOSAWOS,ASOSAWOS_72290023188,4.572,32.734,-117.183,1942-01-01 00:00:00+00:00,2022-12-31 00:00:00+00:00,all,
4,ASOSAWOS,ASOSAWOS_72290693112,4.2672,32.692,-117.21,1945-04-01 00:00:00+00:00,2022-12-31 00:00:00+00:00,all,


In [15]:
def download_nc_from_aws(station, save=False):

    # Temp file for downloading from s3
    tmpFileName = tempfile.NamedTemporaryFile(dir = local_tmp_dir, 
                                              prefix = "", 
                                              suffix = ".nc",
                                              delete = True)

    # Local file name to read/write from
    localFileName = f"{local_perm_dir}/{station}.nc"

    # s3 details
    print('Retrieving data for station...')
    s3 = s3fs.S3FileSystem(anon=False)
    network = station.split('_')[0]
    s3_url = 's3://wecc-historical-wx/3_qaqc_wx_dev/{}/{}.nc'.format(network, station)

    # Read file
    # If file is already downloaded locally, read it
    if os.path.exists(localFileName):
        ds = xr.open_dataset(localFileName, engine='h5netcdf').load()
    # If not, download from s3 bucket
    else:
#        try:
            s3_file_obj = s3.get(s3_url, tmpFileName.name)
            ds = xr.open_dataset(tmpFileName.name, engine='h5netcdf').load()

#        except:
#            raise ValueError(f'Station {station} not found in bucket -- please check if station completed QA/QC.')

    # If we want to save file to disk, copy the temp file to the storage training folder
    if save and not os.path.exists(localFileName):
        os.system(f"cp {tmpFileName.name} {localFileName}")    

    # Download temp file to avoid disk filling
    # os.system(f"rm {tmpFileName.name}")
    tmpFileName.close()
    
    return ds

# pulling from PR 151 until it gets merged

In [35]:
def _all_nan(l):
    return all(math.isnan(x) for x in l)

def flagged_station_search(stn_list, event_start_date, event_end_date, flag_info=False, save_all_stns=False):
    '''
    Helper function that finds flagged stations during an event of interest. 
    Designed to only be run sporadically, as it will take some time to run through 1000+ stations.

    Similar to the station list search functions. 
    '''

    active_flag_stns = []
    
    # warning about downloading all of these files
    if save_all_stns:
        print('Warning: All stations will be downloaded to local memory. Depending on size of station list, this may be 1000+ stations and GB-TB of memory!')

    # read file from AWS
    for stn_id in stn_list['era-id']:
        print(f'Checking flags in {stn_id}...')
        ds_to_check = download_nc_from_aws(stn_id, save=save_all_stns)

        # subset by event dates with buffer
        ds_to_check_sub = ds_to_check.sel(time=slice(event_start_date, event_end_date))

        # if no date coverage
        if len(ds_to_check_sub) == 0:
            continue

        # check if flags are placed, search through any _eraqc var
        vars_to_check = [i for i in ds_to_check_sub.data_vars if '_eraqc' in i]
        flag_list = []

        for v in vars_to_check:
            for item in np.unique(ds_to_check_sub[v]):
                flag_list.append(item)

        print(_all_nan(flag_list))
        # has_numeric = all(isinstance(item, (np.nan)) for item in flag_list) # list of all values in _eraqc vars, including flags and nan
        # print(has_numeric)
        if _all_nan(flag_list): # flag is not present
            continue

        else: # flag is present
            active_flag_stns.append(stn_id)
            if flag_info:
                print(f'{stn_id} has flags placed during event ({event_start_date}-{event_end_date}): {flag_list}')
                    
    return active_flag_stns

In [27]:
test_train = train_stns[:5]
test_train

Unnamed: 0,network,era-id,elevation,latitude,longitude,start_date,end_date,event_type,notes
0,ASOSAWOS,ASOSAWOS_72281023199,-14.0208,32.832,-115.664,1984-04-13 00:00:00+00:00,2022-12-31 00:00:00+00:00,all,
1,ASOSAWOS,ASOSAWOS_72288023152,222.8088,34.2,-118.365,1943-06-01 00:00:00+00:00,2022-12-31 00:00:00+00:00,all,
2,ASOSAWOS,ASOSAWOS_72288623130,239.268,34.212,-118.491,1942-12-11 00:00:00+00:00,2022-12-31 00:00:00+00:00,all,
3,ASOSAWOS,ASOSAWOS_72290023188,4.572,32.734,-117.183,1942-01-01 00:00:00+00:00,2022-12-31 00:00:00+00:00,all,
4,ASOSAWOS,ASOSAWOS_72290693112,4.2672,32.692,-117.21,1945-04-01 00:00:00+00:00,2022-12-31 00:00:00+00:00,all,


In [36]:
event_start_date = "2007-10-20"
event_end_date = "2007-10-24"

flagged_station_search(test_train, event_start_date, event_end_date, flag_info=True)

Checking flags in ASOSAWOS_72281023199...
Retrieving data for station...


  base = data.astype(np.int64)
  data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]")


True
ASOSAWOS_72281023199 has flags placed during event (2007-10-20-2007-10-24): [nan, nan, nan, nan, nan, nan, nan, nan, nan]
Checking flags in ASOSAWOS_72288023152...
Retrieving data for station...


  base = data.astype(np.int64)
  data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]")


False
Checking flags in ASOSAWOS_72288623130...
Retrieving data for station...


  base = data.astype(np.int64)
  data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]")


False
Checking flags in ASOSAWOS_72290023188...
Retrieving data for station...


  base = data.astype(np.int64)
  data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]")


False
Checking flags in ASOSAWOS_72290693112...
Retrieving data for station...


  base = data.astype(np.int64)
  data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]")


False


['ASOSAWOS_72281023199']