# Set final variable order and names


Set final order and names of variables, for consistency.

## Environment set-up

In [1]:
import datetime
import boto3
import geopandas as gpd
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from io import BytesIO, StringIO

# Silence warnings
import warnings
from shapely.errors import ShapelyDeprecationWarning

warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings(
    "ignore", category=ShapelyDeprecationWarning
)  # Warning is raised when creating Point object from coords. Can't figure out why.

plt.rcParams["figure.dpi"] = 300

In [2]:
# AWS credentials
s3 = boto3.resource("s3")
s3_cl = boto3.client("s3")

## AWS buckets
bucket = "wecc-historical-wx"
cleandir = '2_clean_wx/'
qaqcdir = "3_qaqc_wx/"
mergedir = "4_merge_wx/"

## Step 1: Assess differences in order and names

Load in datasets from VALLEYWATER, ASOSAWOS, and MARITIME. 

In [3]:
url = "s3://wecc-historical-wx/3_qaqc_wx/ASOSAWOS/ASOSAWOS_69007093217.zarr"
ds = xr.open_zarr(url)
df_a = ds.to_dataframe()

In [6]:
url = "s3://wecc-historical-wx/3_qaqc_wx/MARITIME/MARITIME_ANVC1.zarr"
ds = xr.open_zarr(url)
df_m = ds.to_dataframe()

In [7]:
url = "s3://wecc-historical-wx/3_qaqc_wx/VALLEYWATER/VALLEYWATER_6001.zarr"
ds = xr.open_zarr(url)
df_v = ds.to_dataframe()

In [12]:
print(df_a.columns)
print(df_m.columns)
print(df_v.columns)

Index(['anemometer_height_m', 'elevation', 'elevation_eraqc', 'lat', 'lon',
       'pr', 'pr_depth_qc', 'pr_duration', 'pr_eraqc', 'pr_qc', 'ps_altimeter',
       'ps_altimeter_eraqc', 'ps_altimeter_qc', 'ps_qc', 'psl', 'psl_eraqc',
       'psl_qc', 'qaqc_process', 'sfcWind', 'sfcWind_dir', 'sfcWind_dir_eraqc',
       'sfcWind_dir_qc', 'sfcWind_eraqc', 'sfcWind_method', 'sfcWind_qc',
       'tas', 'tas_eraqc', 'tas_qc', 'tdps', 'tdps_eraqc', 'tdps_qc',
       'thermometer_height_m'],
      dtype='object')
Index(['anemometer_height_m', 'elevation', 'elevation_eraqc', 'lat', 'lon',
       'ps', 'ps_eraqc', 'sfcWind', 'sfcWind_dir', 'sfcWind_dir_eraqc',
       'sfcWind_eraqc', 'tas', 'tas_eraqc', 'thermometer_height_m'],
      dtype='object')
Index(['anemometer_height_m', 'elevation', 'elevation_eraqc', 'lat', 'lon',
       'pr_15min', 'pr_15min_eraqc', 'raw_qc', 'thermometer_height_m'],
      dtype='object')


In [None]:
def update_order_attrs(df) -> pd.DataFrame:
    """


    Rules
    ------
        1.)
    Parameters
    ------
        df: pd.DataFrame

    Returns
    -------
        if success:

        if failure:
            None
    Notes
    -------

    """
    ##### Reorder variables
    desired_order = [
        "ps",
        "tas",
        "tdps",
        "pr",
        "hurs",
        "rsds",
        "sfcWind",
        "sfcWind_dir",
        "pvp",
        "svp",
        ]

    new_order = [
        i for keyword in desired_order for i in df_a.columns if i.startswith(keyword)
    ]

    # extract qaqc variables, to be sent to the back

    qaqc_vars = [i for i in new_order if 'qc' in i ]

    nonqaqc_vars = [i for i in new_order if i not in qaqc_vars]

    rest_of_vars = [
        i for i in list(df.columns) if i not in desired_order
    ]  # Retain rest of variables at the bottom

    final_order = nonqaqc_vars + qaqc_vars + rest_of_vars

    # remove 'method' and 'duration' vars

    final_order = [
        i for i in final_order if not any(sub in i for sub in ["duration", "method"])
    ]

    df = df[final_order]

    ##### Update attributes

    


    return df

In [50]:
df_test = update_order_attrs(df_a)
df_test.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,ps_altimeter,psl,tas,tdps,pr,sfcWind,sfcWind_dir,sfcWind_dir,ps_altimeter_eraqc,ps_altimeter_qc,...,qaqc_process,sfcWind_dir_eraqc,sfcWind_dir_qc,sfcWind_eraqc,sfcWind_qc,tas_eraqc,tas_qc,tdps_eraqc,tdps_qc,thermometer_height_m
station,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ASOSAWOS_69007093217,1991-01-02 12:00:00,19,19,19,19,0.0,4.1,90.0,90.0,28.0,1,...,V020,,1,,1,,1,28.0,1,
ASOSAWOS_69007093217,1991-01-02 13:00:00,19,19,19,19,,3.6,90.0,90.0,28.0,1,...,V020,,1,,1,,1,28.0,1,


In [51]:
df_a

Unnamed: 0_level_0,Unnamed: 1_level_0,anemometer_height_m,elevation,elevation_eraqc,lat,lon,pr,pr_depth_qc,pr_duration,pr_eraqc,pr_qc,...,sfcWind_eraqc,sfcWind_method,sfcWind_qc,tas,tas_eraqc,tas_qc,tdps,tdps_eraqc,tdps_qc,thermometer_height_m
station,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ASOSAWOS_69007093217,1991-01-02 12:00:00,,41.0,,36.683,-121.767,0.0,9.0,NaT,,1,...,,N,1,19,,1,19,28.0,1,
ASOSAWOS_69007093217,1991-01-02 13:00:00,,41.0,,36.683,-121.767,,,NaT,,,...,,N,1,19,,1,19,28.0,1,
ASOSAWOS_69007093217,1991-01-02 14:00:00,,41.0,,36.683,-121.767,,,NaT,,,...,,N,1,19,,1,19,28.0,1,
ASOSAWOS_69007093217,1991-01-02 15:00:00,,41.0,,36.683,-121.767,0.0,9.0,NaT,,1,...,,N,1,19,,1,19,28.0,1,
ASOSAWOS_69007093217,1991-01-02 16:00:00,,41.0,,36.683,-121.767,,,NaT,,,...,,N,1,19,,1,19,28.0,1,
ASOSAWOS_69007093217,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ASOSAWOS_69007093217,1993-08-31 19:00:00,,41.0,,36.683,-121.767,,,NaT,,,...,,N,1,19,,1,19,28.0,1,
ASOSAWOS_69007093217,1993-08-31 20:00:00,,41.0,,36.683,-121.767,,,NaT,,,...,,N,1,19,,1,19,28.0,1,
ASOSAWOS_69007093217,1993-08-31 21:00:00,,41.0,,36.683,-121.767,,,NaT,,,...,,N,1,19,,1,19,28.0,1,
ASOSAWOS_69007093217,1993-08-31 22:00:00,,41.0,,36.683,-121.767,,,NaT,,,...,,N,1,19,,1,19,28.0,1,
