# Notebook to verify ucla swe data 

In [1]:
import pandas as pd
import xarray as xr 
from snowML.datapipe.utils import data_utils as du 
from snowML.Scripts.load_hucs import load_huc_splits as lhs

# Load an example file 

In [2]:
b = "snowml-gold" 
f = "mean_swe_ucla_2_in_171100050805.csv"
df = du.s3_to_df(f, b)
print(df.shape[0])
df.head()

13149


Unnamed: 0,day,SWE_Post
0,1984-10-01,0.0
1,1984-10-02,0.0
2,1984-10-03,0.0
3,1984-10-04,0.0
4,1984-10-05,2.5e-05


In [3]:
def check_day_issues(huc_list):
    b = "snowml-gold"
    hucs_with_missing_days = []
    hucs_with_duplicate_days = []
    hucs_w_no_df_found = []
    hucs_w_unspecified_error = []

    for huc in huc_list:
        f = f"mean_swe_ucla_2_in_{huc}.csv"
        try:
            df = du.s3_to_df(f, b)
        except Exception:
            hucs_w_no_df_found.append(huc)
            continue

        # Make a copy and ensure 'day' is datetime
        try: 
            df_copy = df.copy()
            df_copy['day'] = pd.to_datetime(df_copy['day'])

            # Check for duplicate days
            has_duplicates = df_copy['day'].duplicated().any()

            # Check for missing days
            full_range = pd.date_range(start=df_copy['day'].min(), end=df_copy['day'].max())
            unique_days = df_copy['day'].drop_duplicates()
            has_missing = len(unique_days) != len(full_range)

            if has_missing:
                hucs_with_missing_days.append(huc)
            if has_duplicates:
                hucs_with_duplicate_days.append(huc)
        except: 
            hucs_w_unspecified_error.append(huc)

    return {
        "hucs_with_missing_days": hucs_with_missing_days,
        "hucs_with_duplicate_days": hucs_with_duplicate_days,
        "hucs_w_no_df_found": hucs_w_no_df_found, 
        "hucs_w_unspecified_error": hucs_w_unspecified_error
    }



In [4]:
check_day_issues([171100050805, 54])

{'hucs_with_missing_days': [],
 'hucs_with_duplicate_days': [],
 'hucs_w_no_df_found': [54],
 'hucs_w_unspecified_error': []}

# Run on All 

In [5]:
f = "../../src/snowML/datapipe/huc_lists/MarMultiSplits.json"

In [6]:
m1, m2, m3  = lhs.huc_split(f)

In [7]:
all = m1+m2+m3

In [8]:
issues_dict = check_day_issues(all)
issues_dict

{'hucs_with_missing_days': ['170300010105',
  '171100060102',
  '170200110203',
  '171100060106',
  '170200090107',
  '171100090201',
  '171100060204',
  '171100050803',
  '170200110201'],
 'hucs_with_duplicate_days': [],
 'hucs_w_no_df_found': [],
 'hucs_w_unspecified_error': ['171100080201',
  '170300020304',
  '171100050603',
  '171100080204',
  '171100060103',
  '170200090204',
  '171100090203',
  '170200090106',
  '171100060302',
  '171100050602']}

In [9]:
len(issues_dict["hucs_w_no_df_found"])

0

In [10]:
redo_list = ['170300010105',
  '171100060106',
  '171100060304',
  '170200110201',
  '171100050605']

In [11]:
error_ls = ['171100050602', '170200110201']

In [12]:
ls = ['170300010105_2016', '170300010105_2018', '170300010105_2020', '171100090502_1984', '170300020101_1996', '170300020101_1997', '170300020101_1998', '170300020101_1999', '170300020101_2000', '170300020101_2001', '170300020101_2002', '170300020101_2003', '170300020101_2004', '170300020101_2005', '170300020101_2006', '170300020101_2007', '170300020101_2008', '170300020101_2009', '170300020101_2010', '170300020101_2011', '170300020101_2012', '170300020101_2013', '170300020101_2014', '170300020101_2015', '170300020101_2016', '170300020101_2017', '170300020101_2018', '170300020101_2019', '170300020101_2020', '170300020103_1984', '170300020103_1985', '170300020103_1986', '170300020103_1987', '170300020103_1988', '170300020103_1989', '170300020103_1990', '170300020103_1991', '170300020103_1992', '170300020103_1993', '170300020103_1994', '170300020103_1995', '170300020103_1996', '170300020103_1997', '170300020103_1998', '170300020103_1999', '170300020103_2000', '170300020103_2001', '170300020103_2002', '170300020103_2003', '170300020103_2004', '170300020103_2005', '170300020103_2006', '170300020103_2007', '170300020103_2008', '170300020103_2009', '170300020103_2010', '170300020103_2011', '170300020103_2012', '170300020301_2009', '170300020301_2010', '170300020301_2011', '170300020301_2012', '170300020301_2013', '170300020301_2014', '170300020301_2015', '170300020301_2016', '170300020301_2017', '170300020301_2018', '170300020301_2019', '170300020301_2020', '171100080201_1984', '171100080201_1985', '171100080201_1986', '171100080201_1987', '171100080201_1988', '171100080201_1989', '171100080201_1990', '171100080201_1991', '171100080201_1992', '171100080201_1993', '171100080201_1994', '171100080201_1995', '171100080201_1996', '171100080201_1997', '171100080201_1998', '171100080201_1999', '171100080201_2000', '171100080201_2001', '171100080201_2002', '171100080201_2003', '171100080201_2004', '171100080201_2005', '171100080201_2006', '171100080201_2007', '171100080201_2008', '171100080201_2009', '171100080201_2010', '171100080201_2011', '171100080201_2012', '171100080201_2013', '171100080201_2014', '171100080201_2015', '171100080201_2016', '171100080201_2017', '171100080201_2018', '171100080201_2019', '171100080201_2020', '170300020304_1984', '170300020304_1985', '170300020304_1986', '170300020304_1987', '170300020304_1988', '170300020304_1989', '170300020304_1990', '170300020304_1991', '170300020304_1992', '170300020304_1993', '170300020304_1994', '170300020304_1995', '170300020304_1996', '170300020304_1997', '170300020304_1998', '170300020304_1999', '170300020304_2000', '170300020304_2001', '170300020304_2002', '170300020304_2003', '170300020304_2004', '170300020304_2005', '170300020304_2006', '170300020304_2007', '170300020304_2008', '170300020304_2009', '170300020304_2010', '170300020304_2011', '170300020304_2012', '170300020304_2013', '170300020304_2014', '170300020304_2015', '170300020304_2016', '170300020304_2017', '170300020304_2018', '170300020304_2019', '170300020304_2020', '171100050603_1984', '171100050603_1985', '171100050603_1986', '171100050603_1987', '171100050603_1988', '171100050603_1989', '171100050603_1990', '171100050603_1991', '171100050603_1992', '171100050603_1993', '171100050603_1994', '171100050603_1995', '171100050603_1996', '171100050603_1997', '171100050603_1998', '171100050603_1999', '171100050603_2000', '171100050603_2001', '171100050603_2002', '171100050603_2003', '171100050603_2004', '171100050603_2005', '171100050603_2006', '171100050603_2007', '171100050603_2008', '171100050603_2009', '171100050603_2010', '171100050603_2011', '171100050603_2012', '171100050603_2013', '171100050603_2014', '171100050603_2015', '171100050603_2016', '171100050603_2017', '171100050603_2018', '171100050603_2019', '171100050603_2020', '171100080204_1984', '171100080204_1985', '171100080204_1986', '171100080204_1987', '171100080204_1988', '171100080204_1989', '171100080204_1990', '171100080204_1991', '171100080204_1992', '171100080204_1993', '171100080204_1994', '171100080204_1995', '171100080204_1996', '171100080204_1997', '171100080204_1998', '171100080204_1999', '171100080204_2000', '171100080204_2001', '171100080204_2002', '171100080204_2003', '171100080204_2004', '171100080204_2005', '171100080204_2006', '171100080204_2007', '171100080204_2008', '171100080204_2009', '171100080204_2010', '171100080204_2011', '171100080204_2012', '171100080204_2013', '171100080204_2014', '171100080204_2015', '171100080204_2016', '171100080204_2017', '171100080204_2018', '171100080204_2019', '171100080204_2020', '171100060103_1984', '171100060103_1985', '171100060103_1986', '171100060103_1987', '171100060103_1988', '171100060103_1989', '171100060103_1990', '171100060103_1991', '171100060103_1992', '171100060103_1993', '171100060103_1994', '171100060103_1995', '171100060103_1996', '171100060103_1997', '171100060103_1998', '171100060103_1999', '171100060103_2000', '171100060103_2001', '171100060103_2002', '171100060103_2003', '171100060103_2004', '171100060103_2005', '171100060103_2006', '171100060103_2007', '171100060103_2008', '171100060103_2009', '171100060103_2010', '171100060103_2011', '171100060103_2012', '171100060103_2013', '171100060103_2014', '171100060103_2015', '171100060103_2016', '171100060103_2017', '171100060103_2018', '171100060103_2019', '171100060103_2020', '170200090204_1984', '170200090204_1985', '170200090204_1986', '170200090204_1987', '170200090204_1988', '170200090204_1989', '170200090204_1990', '170200090204_1991', '170200090204_1992', '170200090204_1993', '170200090204_1994', '170200090204_1995', '170200090204_1996', '170200090204_1997', '170200090204_1998', '170200090204_1999', '170200090204_2000', '170200090204_2001', '170200090204_2002', '170200090204_2003', '170200090204_2004', '170200090204_2005', '170200090204_2006', '170200090204_2007', '170200090204_2008', '170200090204_2009', '170200090204_2010', '170200090204_2011', '170200090204_2012', '170200090204_2013', '170200090204_2014', '170200090204_2015', '170200090204_2016', '170200090204_2017', '170200090204_2018', '170200090204_2019', '170200090204_2020', '171100090203_1984', '171100090203_1985', '171100090203_1986', '171100090203_1987', '171100090203_1988', '171100090203_1989', '171100090203_1990', '171100090203_1991', '171100090203_1992', '171100090203_1993', '171100090203_1994', '171100090203_1995', '171100090203_1996', '171100090203_1997', '171100090203_1998', '171100090203_1999', '171100090203_2000', '171100090203_2001', '171100090203_2002', '171100090203_2003', '171100090203_2004', '171100090203_2005', '171100090203_2006', '171100090203_2007', '171100090203_2008', '171100090203_2009', '171100090203_2010', '171100090203_2011', '171100090203_2012', '171100090203_2013', '171100090203_2014', '171100090203_2015', '171100090203_2016', '171100090203_2017', '171100090203_2018', '171100090203_2019', '171100090203_2020', '170200090106_1984', '170200090106_1985', '170200090106_1986', '170200090106_1987', '170200090106_1988', '170200090106_1989', '170200090106_1990', '170200090106_1991', '170200090106_1992', '170200090106_1993', '170200090106_1994', '170200090106_1995', '170200090106_1996', '170200090106_1997', '170200090106_1998', '170200090106_1999', '170200090106_2000', '170200090106_2001', '170200090106_2002', '170200090106_2003', '170200090106_2004', '170200090106_2005', '170200090106_2006', '170200090106_2007', '170200090106_2008', '170200090106_2009', '170200090106_2010', '170200090106_2011', '170200090106_2012', '170200090106_2013', '170200090106_2014', '170200090106_2015', '170200090106_2016', '170200090106_2017', '170200090106_2018', '170200090106_2019', '170200090106_2020', '171100060302_1984', '171100060302_1985', '171100060302_1986', '171100060302_1987', '171100060302_1988', '171100060302_1989', '171100060302_1990', '171100060302_1991', '171100060302_1992', '171100060302_1993', '171100060302_1994', '171100060302_1995', '171100060302_1996', '171100060302_1997', '171100060302_1998', '171100060302_1999', '171100060302_2000', '171100060302_2001', '171100060302_2002', '171100060302_2003', '171100060302_2004', '171100060302_2005', '171100060302_2006', '171100060302_2007', '171100060302_2008', '171100060302_2009', '171100060302_2010', '171100060302_2011', '171100060302_2012', '171100060302_2013', '171100060302_2014', '171100060302_2015', '171100060302_2016', '171100060302_2017', '171100060302_2018', '171100060302_2019', '171100060302_2020', '171100060102_1984', '171100060102_1985', '171100060102_1986', '171100060102_1987', '171100060102_1988', '171100060102_1989', '171100060102_1990', '171100060102_1991', '171100060102_1992', '171100060102_1993', '171100060102_1994', '171100060102_1995', '171100060102_2003', '170200110203_2007', '170200110203_2010', '170200110203_2011', '171100050904_2020', '171100060106_2004', '170200090107_1994', '171100090201_2015', '171100060204_2004', '171100050803_2001']

In [13]:
len(ls)

425

In [14]:
unique_hucs = set(item.split('_')[0] for item in ls)
print(len(unique_hucs))
unique_hucs

22


{'170200090106',
 '170200090107',
 '170200090204',
 '170200110203',
 '170300010105',
 '170300020101',
 '170300020103',
 '170300020301',
 '170300020304',
 '171100050603',
 '171100050803',
 '171100050904',
 '171100060102',
 '171100060103',
 '171100060106',
 '171100060204',
 '171100060302',
 '171100080201',
 '171100080204',
 '171100090201',
 '171100090203',
 '171100090502'}

In [16]:
ls = list(unique_hucs)
ls

['171100080201',
 '171100060204',
 '171100090201',
 '170300010105',
 '171100060102',
 '171100050803',
 '170200090106',
 '171100060302',
 '171100060106',
 '171100090203',
 '171100080204',
 '170300020304',
 '171100090502',
 '170200110203',
 '170200090204',
 '171100050904',
 '170300020101',
 '170300020103',
 '170300020301',
 '170200090107',
 '171100060103',
 '171100050603']

In [None]:
ls0 = ['171100080201']
ls1 = ['171100060204', '171100090201', '170300010105', '171100060102', '171100050803', '170200090106', '171100060302', '171100060106']
ls2 = ['171100090203','171100080204','170300020304','171100090502','170200110203','170200090204','171100050904']
ls3 = ['170300020101','170300020103','170300020301','170200090107','171100060103','171100050603']