In [None]:
# Import Packages
import os
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy
from datetime import datetime, date

In [None]:
# Change Directory
import os
os.chdir('swe_data/')
os.listdir()

## Function Used in Later Code

In [76]:
### 2. Function for Extracting Index
def index_finder(lon,lat):
    # Longtitude finder
    if lon < -123.3 or lon > -117.6:
        print('Longitude of Input is out of range! lon:',lon)
        return None
    elif lat < 35.4 or lat > 42:
        print('Latitude of Input is out of range! lat:',lat)
    else: #longtitude and latitude are within reasonable range
        lon_idx = round((lon + 123.3) * 1000)
        lat_idx = round((lat - 35.4) * 1000)
    
        return int(lon_idx),int(lat_idx)

## Read Data

In [None]:
# Discharge Data
gage = pd.read_csv('../gage_discharge_lat_lon.csv')
gage['swe'] = -1

In [None]:
gage.head()

In [None]:
# Variable Defining
prev_year = 0

In [None]:
### 
for row_num in range(len(gage)):
    row_data = gage.iloc[row_num,:]
    row_time = row_data['time']
    if row_time[0:4] == '2015' and row_data['gage']==11189500:
        break

In [None]:
row_data

In [None]:
# ---------- Inside For Loop -----------

In [None]:
### Extract Date from Gage Data to match SWE
date_format = "%Y-%m-%d"
d_date = datetime.strptime(row_time, date_format)

# Extract year of date
d_year = d_date.year
# Extract number of days from SWE Data
num_days = d_date- datetime.strptime('{}-1-1'.format(d_year),date_format)
num_days = num_days.days

print(f'Year: {d_year}  Day: {num_days}')


In [None]:
## Obtain swe data
# if year of previous row does not match year of current row. Then Read Data
if prev_year != d_year: 
    swe = h5py.File(f'SN_SWE_WY{d_year}.h5', 'r')
    # # testing purpose
    # swe = h5py.File('/Users/apple/Desktop/UC_Berkeley/UCB_2022/w210/data/SN_SWE_WY1985.h5','r')
    lat = swe['lat'][0][::-1]
    lon = swe['lon'][:,0]
    lats,lons = np.meshgrid(lat,lon)
prev_year = d_year

swe_data = swe['SWE'][num_days]
# flip over yaxis as lats are in a descending order --> need to change to ascending order
swe_data_flip = swe_data[:,::-1]

## Find SWE Interested Region with Lat Lon

In [80]:
### 4. Find closest idx to the lower left & upper right corner
ll_lon_idx,ll_lat_idx = index_finder(row_data['ll_lon'],row_data['ll_lat'])
tr_lon_idx,tr_lat_idx = index_finder(row_data['tr_lon'],row_data['tr_lat'])
region = swe_data_flip[ll_lon_idx:tr_lon_idx,ll_lat_idx:tr_lat_idx]

# # Get SWE values of surrounding gage regions
# lon_idx,lat_idx = index_finder(row_data['lon'],row_data['lat'])
# lon_idx,lat_idx = int(lon_idx),int(lat_idx)
# region = swe_data_flip[lon_idx-sur:lon_idx+sur,lat_idx-sur:lat_idx+sur]


4916


In [78]:
# Print Shape of array
print('Region Shape:',region.shape)
# Print 
print('Unique Value:',np.unique(region,return_counts=True))
region

Region Shape: (380, 709)
Unique Value: (array([-32768,      0], dtype=int16), array([ 39327, 230093]))


array([[-32768, -32768, -32768, ...,      0,      0,      0],
       [-32768, -32768, -32768, ...,      0,      0,      0],
       [-32768, -32768, -32768, ...,      0,      0,      0],
       ...,
       [     0,      0,      0, ..., -32768, -32768, -32768],
       [     0,      0,      0, ..., -32768, -32768, -32768],
       [-32768, -32768,      0, ..., -32768, -32768, -32768]], dtype=int16)

In [None]:
'''
 ---- OLD COORDINATES ---
Gage with swe values: 11266500 
Gage with no swe values: 11402000, 11318500,11208000
Gage with certain swe values: 11185500
Gage with limited swe values: 11189500, 11202710
Note: Gage 11202710 starts from 1988 while others start from 1985

 ---- SWE on NEW COORDINATES (10/15) ----
 11202710: 40,904 valid & 6616 n/a values from region of 198x240 array
 11266500: 126,687 valid & 2784 n/a values from region of 419x309 array
 11402000: 36,414 valid & 54,651 n/a values from region of 467x195 array
 11318500: 9510 valid & 29072 n/a values from region of 382x101 array
 11208000: 29,403 valid & 3461 n/a values from region of 208x158 array
 11185500: 348,392 valid & 21336 n/a values from region of 436x848 array
 11189500: 230,093 valid & 39327 n/a values from region of 380x709 array
 --------------------------------------------------
'''

In [None]:
### Obtain Value of Interested Region

# change null values to null
region=region.astype('float')
region[region == -32768] = np.nan

region_avg = np.nanmean(region)

if region_avg == np.nan:
    gage.loc[row_num,'swe'] = -1
else: # region avg is not null
    gage.loc[row_num,'swe'] = region_avg

# Pipeline: Data Joining Between Gage Time Series Data & SWE Value
- matching swe data to the gage time series data based on lat & lon of the gage
- **main code**

In [None]:
class j

In [None]:
# gage = pd.read_csv('../gage_swe.csv')
date_format = "%Y-%m-%d"
index_list = []

### Run through all data
for ii,row_num in enumerate(range(len(gage))):
    
    ### Start from row xx
    if ii >=0:
    
        if ii % 100 == 0:
            print(f'-------- Processing Row Number {ii} out of {len(gage)} ---------')
        # Start with each row
        row_data = gage.iloc[row_num,:]
        row_time = row_data['time']

        if 1984 < int(row_time[0:4]) <2017  : # SWE has only data files from 1984 - 2016
            
            ### Obtain Value of Interested Region
            ll_lon_idx,ll_lat_idx = index_finder(row_data['ll_lon'],row_data['ll_lat'])
            tr_lon_idx,tr_lat_idx = index_finder(row_data['tr_lon'],row_data['tr_lat'])
            
            if (lon_idx,lat_idx) not in index_list:

                ### Extract Date from Gage Data to match SWE
                d_date = datetime.strptime(row_time, date_format)

                # Extract year of date
                d_year = d_date.year
                # Extract number of days from SWE Data
                num_days = d_date- datetime.strptime('{}-1-1'.format(d_year),date_format)
                num_days = num_days.days

                ## Obtain swe data
                # if year of previous row does not match year of current row. Then Read Data
                if prev_year != d_year: 
                    swe = h5py.File(f'SN_SWE_WY{d_year}.h5', 'r')
                    lat = swe['lat'][0][::-1]
                    lon = swe['lon'][:,0]
                    lats,lons = np.meshgrid(lat,lon)
                prev_year = d_year

                swe_data = swe['SWE'][num_days]
                # flip over yaxis as lats are in a descending order --> need to change to ascending order
                swe_data_flip = swe_data[:,::-1]

                # get SWE values of surrounding region
                region = swe_data_flip[ll_lon_idx:tr_lon_idx,ll_lat_idx:tr_lat_idx]

                # change -32768 (null values) to null
                region=region.astype('float')
                region[region == -32768] = np.nan
                
                ### Select way to process the region data into a single value or a k-dimension vector
                region_avg = np.nanmean(region)
                #########################

                if pd.isna(region_avg): # if region_avg is null, change to -2
                    gage.loc[row_num,'swe'] = -2
                    index_list.append((lon_idx,lat_idx))
                else: # region avg is not null, assign the value with average
                    gage.loc[row_num,'swe'] = region_avg
            else: # (lon_idx,lat_idx) in index_list
                gage.loc[row_num,'swe'] = -2
    
        if ii % 1000 == 0:
            gage.to_csv('../gage_with_swe.csv',index=False)
            print(f'---- Round {ii} Save to CSV file')

In [None]:
# save gage file to csv
gage.to_csv('../gage_with_swe.csv',index=False)

In [None]:
gage.loc[80000:'swe'].value_counts()

In [None]:
'''
## SWE value indication
Initial Assigned Value: -1
N/A value: -2
Rest are normal values
'''

In [None]:
# ---------------------------