# Debugging MERGE_pipeline.py 
Break down the script step by step to debug 

In [None]:
from datetime import datetime, timedelta, timezone
import time
import inspect
from typing import Dict

import pandas as pd
import xarray as xr
import logging

from merge_log_config import setup_logger, upload_log_to_s3
from merge_hourly_standardization import merge_hourly_standardization
from merge_derive_missing import merge_derive_missing_vars
from merge_clean_vars import merge_reorder_vars, merge_drop_vars
from merge_eraqc_counts import eraqc_counts_native_timestep

from MERGE_pipeline import read_station_metadata, validate_station, read_zarr_dataset, get_var_attrs, convert_xr_to_df, convert_df_to_xr, write_zarr_to_s3

In [None]:
station = "CDEC_BLB"
verbose = True

In [None]:
bucket_name = "wecc-historical-wx"
stations_csv_path = f"s3://{bucket_name}/2_clean_wx/temp_clean_all_station_list.csv"
qaqc_dir = "3_qaqc_wx"
merge_dir = "4_merge_wx"

# Log start time
start_time = time.time()

## ======== SETUP ========

# Set up logger
logger, log_filepath = setup_logger(station, verbose=verbose)

# Load station metadata
stations_df = read_station_metadata(stations_csv_path, logger)

# Validate station and get network name
network_name = validate_station(station, stations_df, logger)

## ======== READ IN AND REFORMAT DATA ========

# Load Zarr dataset from S3
ds = read_zarr_dataset(bucket_name, qaqc_dir, network_name, station, logger)

# Get variable attributes from dataset
var_attrs = get_var_attrs(ds, network_name, logger)

# Convert dataset to DataFrame
df = convert_xr_to_df(ds, logger)

# ======== MERGE FUNCTIONS ========

# Part 1: Construct and export table of raw QAQC counts per variable
# For success report
eraqc_counts_native_timestep(df, network_name, station, logger)
df0 = df.copy()
# Part 2: Derive any missing variables
df, var_attrs = merge_derive_missing_vars(df, var_attrs, logger)

df1 = df.copy()

# Part 3: Standardize sub-hourly observations to hourly
df, var_attrs = merge_hourly_standardization(df, var_attrs, logger)

# Part 3b: Construct and export table of raw QAQC counts per variable post-hourly standardization
# For HDP project documentation and final report
# ----- INCOMPLETE -----

# Part 4: Drops raw _qc variables (DECISION TO MAKE) or provide code to filter
df2 = df.copy()
df, var_attrs = merge_drop_vars(df, var_attrs, logger)

# Part 5: Re-orders variables into final preferred order
df = merge_reorder_vars(df, logger)

# ======== CLEANUP & UPLOAD DATA TO S3 ========

# Convert the cleaned DataFrame to an xarray.Dataset and assign global + variable-level metadata
ds_merged = convert_df_to_xr(df, ds.attrs, var_attrs, logger)

# # Write the xarray Dataset as a Zarr file to the specified S3 path
# write_zarr_to_s3(
#     ds_merged, bucket_name, merge_dir, network_name, station, logger
# )

In [None]:
ds_merged

In [None]:
print(df0.columns)
print(df1.columns)
print(df2.columns)

In [None]:
var_attrs