# Scratch 

In [1]:
import pandas as pd
import json
from snowML.datapipe.utils import data_utils as du

In [2]:
def drop_swe_columns(df): 
    df_slim = df.loc[:, ~df.columns.str.contains('swe', case=False)]
    return df_slim

def add_lagged_swe(df, num_list): 
    col_names = [f"mean_swe_lag_{num}" for num in num_list]
    for num, col in zip(num_list, col_names):
        df[col] = df["mean_swe"].shift(num)
    return df

def load_UA(huc):
    f_UA = f"model_ready_huc{huc}.csv"
    b_mr = "snowml-model-ready"  # TO DO - Make Dynamic 
    df_UA = du.s3_to_df(f_UA, b_mr)
    df_UA.set_index("day", inplace=True)
    col_order = list(df_UA.columns)
    return df_UA, col_order

def load_UCLA(huc):
    f_UCLA = f"mean_swe_ucla_2_in_{huc}.csv"
    b_s = "snowml-gold" # TO DO - Make Dynamic 
    df_UCLA = du.s3_to_df(f_UCLA, b_s)
    df_UCLA.set_index("day", inplace=True)
    df_UCLA.rename(columns={"SWE_Post": "mean_swe"}, inplace=True)
    return df_UCLA

def process_one_huc(huc): 
    df_UA, col_order = load_UA(huc)
    df_model_slim = drop_swe_columns(df_UA)
    df_UCLA = load_UCLA(huc)
    df_model = df_UCLA.join(df_model_slim, how="inner")
    num_list = [7, 30, 60]
    df_model = add_lagged_swe(df_model, num_list)
    df_model_final = df_model[col_order]
    f_out = f"model_ready_huc{huc}_ucla"
    b_mr = "snowml-model-ready"  # TO DO - Make Dynamic 
    du.dat_to_s3(df_model_final, b_mr, f_out, file_type="csv")
    return df_model_final
    
  

In [3]:
huc_id = "171100051002"

In [4]:
df_model = process_one_huc(huc_id)
print(df_model.shape)
df_model.head(2)

File model_ready_huc171100051002_ucla.csv successfully uploaded to snowml-model-ready
(13149, 12)


Unnamed: 0_level_0,mean_pr,mean_tair,mean_vs,mean_srad,mean_hum,mean_swe,Mean Elevation,Predominant Snow,Mean Forest Cover,mean_swe_lag_7,mean_swe_lag_30,mean_swe_lag_60
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1984-10-01,0.0,8.1625,2.175,144.5,0.660562,0.0,1204.386841,Maritime,45.459721,,,
1984-10-02,1.95,10.24375,1.475,121.85,0.706625,0.0,1204.386841,Maritime,45.459721,,,


In [5]:
f = "../../src/snowML/datapipe/huc_lists/MandM_hucs_.json"
with open(f, 'r', encoding='utf-8') as file: data = json.load(file)
hucs_mar = data["maritime_hucs"]
len(hucs_mar)

154

In [6]:
hucs_mar_small = hucs_mar
# df_errors was ['171100050602'

In [9]:
# no errors in tum hucs 
tum_hucs = ['180400090101',
 '180400090102',
 '180400090103',
 '180400090104',
 '180400090105',
 '180400090106',
 '180400090107',
 '180400090201',
 '180400090202',
 '180400090301',
 '180400090302',
 '180400090303',
 '180400090401',
 '180400090402',
 '180400090403',
 '180400090404',
 '180400090501',
 '180400090502',
 '180400090503',
 '180400090504',
 '180400090601',
 '180400090801']

In [11]:
hucs_mon = data["montane_hucs"]
len(hucs_mon)

187

In [12]:
df_errors = []
for huc in hucs_mon: 
    try: 
        process_one_huc(huc)
    except: 
        df_errors.append(huc)
print (df_errors)

['170200090208', '170200090209', '170200090301', '170200090302', '170200090304', '170200110406', '170200110501', '170200110502', '170200110503', '170200110601', '170200110602', '170200110603', '170200110704', '171100050402', '171100050404', '171100050501', '170300010201', '170300010204', '170300010205', '170300010401', '170300010402', '170300010501', '170300010503', '170300010504', '170300010508', '170300010509', '170300010601', '170300010602', '170300020108', '170300020109', '170300020201', '170300020202', '170300020203', '170300020205', '170300020206', '170300020306', '170300020307', '170300020308', '170300020309', '170300030101', '170300030102', '170300030103', '170300030602', '170200100101', '170200100102', '170200100103', '170200100204', '170200100205', '170200100208', '170200100305', '170200100309', '170200100310', '170200100402', '170103040101', '170103040102', '170103040103', '170103040104', '170103040105', '170103040201', '170103040202', '170103040203', '170103040301', '170103