# QAQC order and single function testing

## Import libraries

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib as mpl
import glob

%matplotlib inline
%load_ext autoreload
%autoreload 2

## Import QAQC libraries

In [None]:
import os
import tempfile
import argparse 

# Import all qaqc script functions
try:
    from qaqc_plot import *
    from qaqc_utils import *
    from qaqc_wholestation import *
    from qaqc_logic_checks import *
    from qaqc_buoy_check import *
    from qaqc_frequent import *
    from qaqc_unusual_gaps import *
    from qaqc_unusual_large_jumps import *
    from qaqc_climatological_outlier import *
    from qaqc_unusual_streaks import *
except Exception as e:
    print("Error importing qaqc script: {}".format(e))

# Import qaqc stage calc functions
try:
    from QAQC_pipeline import *
except:
    print("Error importing QAQC_pipeline.py")
    
if not os.path.exists("./qaqc_figs"):
    os.makedirs("./qaqc_figs")

## Testing on a single network / station

### Load file and convert to pandas df 

In [None]:
""" Aqaqc_world_recordtions from QAQC libraries can be used here.
    For the qaqc pipeline, we need the pandas df in the same format used in it
"""

network = "VCAPCD"
rawdir, cleandir, qaqcdir, mergedir = get_file_paths(network)
print(rawdir)
print(cleandir)
print(qaqcdir)
print(mergedir)

In [None]:
""" We can test in the same way that the pipeline, 
    take a network and subsample an station
"""

files, stations = read_network_files(network, cleandir)
stations_sample = stations.sample(1)
station = stations_sample.iloc[0]
print(station)

In [None]:
""" We could load station files from the 
    s3 bucket
"""
file_name = cleandir + station + ".nc"
fs = s3fs.S3FileSystem()
aws_url = "s3://wecc-historical-wx/"+file_name

with fs.open(aws_url) as fileObj:
    t0 = time.time()
    ds = xr.open_dataset(fileObj).load()
    ds = ds.drop_duplicates(dim="time")

In [None]:
ds

In [None]:
""" Or we can test on a locally stored station 
    file directly
"""
ds = xr.open_dataset('Train_Files/LOXWFO_OX1MB.nc')
ds

In [None]:
""" Then, the idea is to convert to pandas df in the 
    format needed for the qaqc pipeline
"""
df, MultiIndex, attrs, var_attrs = qaqc_ds_to_df(ds)
df.head(5)

### Test QAQC pipeline of single functions

After converting to df in the same format than the pipeline, we could test the whole pipeline, or single functions

#### Testing single functions

In [None]:
new_df = df.copy()

In [None]:
new_df = qaqc_world_record(new_df)

In [None]:
new_df = qaqc_elev_range(new_df)

In [None]:
new_df = qaqc_unusual_repeated_streaks(new_df, plot=True, local=True, verbose=True)

In [None]:
new_df = qaqc_climatological_outlier(new_df, plot=True, verbose=True)

#### Testing the whole pipeline

In [None]:
# Set up error handling.
errors, end_api, timestamp = setup_error_handling()

In [None]:
df, attrs, var_attrs = run_qaqc_pipeline(ds, network, file_name, errors, 
                                         station, end_api, rad_scheme="remove_zeros",
                                         verbose=True, local=True)

In [None]:
### Or we can always run the same way that the ALL_NETWORKS_qaqc.py does:
# whole_station_qaqc(network, cleandir, qaqcdir, rad_scheme="remove_zeros", 
#                    verbose=True, local=True)

In [None]:
""" Aqaqc_world_recordtions from QAQC libraries can be used here.
    For the qaqc pipeline, we need the pandas df in the same format used in it
"""

network = "ASOSAWOS"
rawdir, cleandir, qaqcdir, mergedir = get_file_paths(network)
print(rawdir)
print(cleandir)
print(qaqcdir)
print(mergedir)

In [None]:
""" We can test in the same way that the pipeline, 
    take a network and subsample an station
"""

files, stations = read_network_files(network, cleandir)
stations_sample = stations.sample(1)
station = stations_sample.iloc[0]
print(station)

In [None]:
pwd

- ASOSAWOS_74718503144
- ASOSAWOS_74917900392 

In [6]:
""" Or we can test on a locally stored station 
    file directly
"""
ds = xr.open_dataset('Train_Files/ASOSAWOS_74917900392.nc')
ds

In [None]:
# ds.ps_altimeter.plot()

In [7]:
""" Then, the idea is to convert to pandas df in the 
    format needed for the qaqc pipeline
"""
df, MultiIndex, attrs, var_attrs = qaqc_ds_to_df(ds)
df.head(2)

  base = data.astype(np.int64)
  data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]")


Unnamed: 0,time,ps,tas,tdps,pr,sfcWind,sfcWind_dir,elevation,qaqc_process,ps_qc,...,tdps_eraqc,pr_eraqc,sfcWind_eraqc,sfcWind_dir_eraqc,elevation_eraqc,ps_altimeter_eraqc,pr_duration_eraqc,anemometer_height_m,thermometer_height_m,station
0,2009-02-01 00:00:00,,294.15,280.15,,4.6,320.0,72.0,V020,9,...,,,,,,,NaT,,,ASOSAWOS_74917900392
1,2009-02-01 00:15:00,,292.15,280.15,,5.7,320.0,72.0,V020,9,...,,,,,,,NaT,,,ASOSAWOS_74917900392


In [None]:
# new_df = qaqc_unusual_large_jumps(df, plot=True, local=True, verbose=True)

In [None]:
# new_df = qaqc_unusual_repeated_streaks(df, plot=True, local=True, verbose=True)

In [None]:
# new_df = qaqc_unusual_large_jumps(new_df, plot=True, local=True, verbose=True)

In [10]:
new_df = qaqc_unusual_gaps(df, plots=True, verbose=True)

KeyboardInterrupt: 