# Merge pipeline debugging 

In [None]:
import xarray as xr 
import pandas as pd
import numpy as np
import logging

from merge_hourly_standardization import merge_hourly_standardization
from merge_derive_missing import merge_derive_missing_vars
from merge_clean_vars import merge_reorder_vars, merge_drop_vars

### 1. Read in a sample zarr, setup for input to function
1. Make a simple logger that just prints to the console (required input to function)
2. Read in zarr as xr.Dataset 
3. Convert xr.Dataset --> pd.DataFrame (required input to function)
4. Change MultiIndex DataFrame to a flat (single-level) DataFrame, with `time` and `station` as normal columns 
5. Input DataFrame, variable attributes, and logger to `merge_hourly_standardization`

In [None]:
# Create a simple logger that just prints to the console
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()  

# Read in zarr as xarray object 
s3_path = "s3://wecc-historical-wx/3_qaqc_wx/ASOSAWOS/ASOSAWOS_69007093217.zarr/"
ds = xr.open_zarr(s3_path)
var_attrs = {
        var: ds[var].attrs for var in list(ds.data_vars.keys())
    }  # Attributes from each variable

# Convert to dataframe 
df = ds.to_dataframe()

# Set index MultiIndex --> normal df 
df.reset_index(inplace=True)

# Derive missing vars 
df_after_missing_vars, var_attrs = merge_derive_missing_vars(df, var_attrs, logger)

# Input dataframe to merge_hourly_standardization function 
df_after_merge, var_attrs = merge_hourly_standardization(df_after_missing_vars, var_attrs, logger)

# Part 5: Drops raw _qc variables (DECISION TO MAKE) or provide code to filter 
df_final, var_attrs = merge_drop_vars(df_after_merge, var_attrs)

# Part 6: Re-orders variables into final preferred order
df_final = merge_reorder_vars(df_final)

# Convert to xarray dataset
ds = df_final.to_xarray()

# Assign attributes for each variable
for var, attrs in var_attrs.items():
    ds[var] = ds[var].assign_attrs(attrs)

### 2. Observe the original DataFrame

In [None]:
df.head()

### 3. Obeserve the output DataFrames

In [None]:
# df after merge_hourly_standardization function 
df_after_merge.head()

In [None]:
# df after merge_derive_missing_vars function 
df_after_merge.head()

In [None]:
df_final.head()

In [None]:
list(var_attrs.keys())

In [None]:
df_final.columns