In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import sys
from datetime import datetime
sys.path.append('../../')

from spicy_snow.processing.snow_index import calc_delta_cross_ratio, calc_delta_gamma, \
    clip_delta_gamma_outlier, calc_snow_index, calc_snow_index_to_snow_depth
from spicy_snow.processing.wet_snow import id_newly_wet_snow, id_wet_negative_si, \
    id_newly_frozen_snow, flag_wet_snow

np.random.seed(0)

In [9]:
int(time.time())

1681795189

In [6]:
import time
np.random.seed(int(time.time()))

# seperate out training and validation datasets

In [5]:
# https://stackoverflow.com/questions/56257429/randomly-mask-set-nan-x-of-data-points-in-huge-xarray-dataarray

train_dir = Path('../../data/bootstrap/training')
val_dir = Path('../../data/bootstrap/validation')

train_dir.mkdir(parents = True, exist_ok = True)
val_dir.mkdir(parents = True, exist_ok = True)

for fp in Path('../../Lidar_s1_stacks').glob('*.nc'):
    ds = xr.open_dataset(fp)
    mask = xr.zeros_like(ds)['fcf'].rename('mask')
    mask.data = np.random.rand(*mask.data.shape) < 0.2
    mask = mask.broadcast_like(ds['s1'])
    train = ds.where(~mask)
    val = ds.where(mask)

    train.to_netcdf(train_dir.joinpath(fp.name.replace('.nc','.train.nc')))
    val.to_netcdf(val_dir.joinpath(fp.name.replace('.nc','.val.nc')))

KeyboardInterrupt: 

# loop through .nc and resample and calculate all possible retrieved snowdepths

In [None]:
# np.random.seed(0)
# site_dir = Path('/Users/zachkeskinen/Documents/spicy-snow/scripts/optimize/param_sds/Mores_2021-03-15')
# ds = xr.open_dataset(next(site_dir.glob('*')))
# idx = ds['lidar-sd'].rename('mask')
# idx.data = np.random.rand(*idx.data.shape) < 0.2
# idx = idx.broadcast_like(ds)
# for fp in site_dir.glob('*'):
#     ds = xr.open_dataset(fp)
#     a,b,c = fp.stem.split('_')
#     train = ds.where(~idx)
#     val = ds.where(idx)

In [None]:
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from shapely import wkt
from shapely.geometry import box
from pathlib import Path
from datetime import datetime
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

import sys
sys.path.append('../..')

from spicy_snow.processing.snow_index import calc_delta_cross_ratio, calc_delta_gamma, \
    clip_delta_gamma_outlier, calc_snow_index, calc_snow_index_to_snow_depth
from spicy_snow.processing.wet_snow import id_newly_wet_snow, id_wet_negative_si, \
    id_newly_frozen_snow, flag_wet_snow

# Create parameter space
A = np.round(np.arange(1, 3.1, 0.5), 2)
B = np.round(np.arange(0, 1.01, 0.1), 2)
C = np.round(np.arange(0, 1.001, 0.01), 2)

files = Path('../../Lidar_s1_stacks/').glob('*.nc')
param_dir = Path('~/scratch/params').expanduser()
for f in files:

    # get dataset
    ds_name = f.name.split('stacks/')[-1].split('.')[0]
    print(datetime.now(), f' -- starting {ds_name}')
    ds_ = xr.open_dataset(f).load()
    dataset = ds_[['s1','deltaVV','ims','fcf','lidar-sd']]

    # find closest timestep to lidar
    td = abs(pd.to_datetime(dataset.time) - pd.to_datetime(dataset.attrs['lidar-flight-time']))
    closest_ts = dataset.time[np.argmin(td)]

    param_dir.joinpath(f'{ds_name}').mkdir()

    # Brute-force processing loop
    for a in tqdm(A):
        # print(f'A: {a}')
        ds = calc_delta_cross_ratio(dataset, A = a)
        for b in B:
            # print(f'    B: {b}')
            ds = calc_delta_gamma(ds, B = b, inplace=False)
            ds = clip_delta_gamma_outlier(ds)
            ds = calc_snow_index(ds)
            ds = id_newly_wet_snow(ds)
            ds = id_wet_negative_si(ds)
            ds = id_newly_frozen_snow(ds)
            ds = flag_wet_snow(ds)
            for c in C:
                # print(f'        c: {c}')
                # print(f'A={a}; B={b}; C={c}')

                ds = calc_snow_index_to_snow_depth(ds, C = c)

                sub = ds.sel(time = closest_ts)[['snow_depth', 'wet_snow', 'lidar-sd']]
                sub.to_netcdf(param_dir.joinpath(f'{ds_name}/{a}_{b}_{c}.nc'))

2023-04-17 15:48:25.321336  -- starting Mores_2020-02-09


100%|██████████| 5/5 [06:33<00:00, 78.69s/it]


2023-04-17 15:54:58.772782  -- starting Frasier_2021-03-19


100%|██████████| 5/5 [07:18<00:00, 87.68s/it]


2023-04-17 16:02:18.050228  -- starting Dry_Creek_2020-02-19


100%|██████████| 5/5 [08:41<00:00, 104.37s/it]


2023-04-17 16:11:00.581221  -- starting Banner_2021-03-15


100%|██████████| 5/5 [10:31<00:00, 126.38s/it]


2023-04-17 16:21:33.480180  -- starting Little_Cottonwood_2021-03-18


100%|██████████| 5/5 [08:30<00:00, 102.04s/it]


2023-04-17 16:30:04.314760  -- starting Mores_2021-03-15


100%|██████████| 5/5 [08:08<00:00, 97.66s/it] 


2023-04-17 16:38:13.481267  -- starting Banner_2020-02-18


100%|██████████| 5/5 [07:21<00:00, 88.23s/it]


2023-04-17 16:45:35.957895  -- starting Frasier_2020-02-11


100%|██████████| 5/5 [06:33<00:00, 78.74s/it]


2023-04-17 16:52:10.153865  -- starting Cameron_2021-03-19


100%|██████████| 5/5 [07:06<00:00, 85.21s/it]


In [1]:
from sklearn.metrics import mean_squared_error
def get_bootstrap(x, y):
    # ravel to numpy arrays
    x = x.values.ravel()
    y = y.values.ravel()

    # remove nans
    x_buff = x[(~np.isnan(x)) & (~np.isnan(y))]
    y = y[(~np.isnan(x)) & (~np.isnan(y))]
    x = x_buff

    # bootstrap
    x_bs = np.random.choice(x, size = len(x))
    y_bs = np.random.choice(y, size = len(y))

    return x_bs, y_bs

def calc_rmse(y_actual, y_pred):
    rms = mean_squared_error(y_actual, y_pred, squared = False)
    return rms

In [2]:
# from itertools import product
from tqdm.contrib.itertools import product
import numpy as np
import pandas as pd
import xarray as xr
from pathlib import Path
from tqdm import tqdm

# Create parameter space
A = np.round(np.arange(1, 3.1, 0.5), 2)
B = np.round(np.arange(0, 1.01, 0.1), 2)
C = np.round(np.arange(0, 1.001, 0.01), 2)
ABC = [A, B, C]

param_dir = Path('~/scratch/params').expanduser()

df = pd.DataFrame(np.empty((10, 4), dtype = float), columns = ['a', 'b', 'c', 'rmse'])

for i in range(10):
    np.random.seed(i)

    rmse_no_flag = xr.DataArray(np.empty((len(A), len(B), len(C)))*np.nan,
                            coords=(A, B, C), dims=('A','B','C'))

    for a, b, c in product(*ABC):
        # lidar = []
        # spicy = []
        # for loc_dir in param_dir.glob('*'):
        #     res = xr.open_dataset(param_dir.joinpath(f'{loc_dir.name}/{a}_{b}_{c}.nc'))
        #     sd_actual, sd_pred = get_bootstrap(res['lidar-sd'], res['snow_depth'])
        #     lidar.extend(sd_actual)
        #     spicy.extend(sd_pred)
            
        rmse_no_flag.loc[a,b,c] = np.random.random() #calc_rmse(lidar, spicy)

    best = rmse_no_flag.where(rmse_no_flag==rmse_no_flag.min(), drop=True).squeeze()
    a, b, c = best.coords.values()
    df.loc[i, 'a'] = a
    df.loc[i, 'b'] = b
    df.loc[i, 'c'] = c
    df.loc[i, 'rmse'] = best


  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 5555/5555 [00:00<00:00, 6797.72it/s]
100%|██████████| 5555/5555 [00:00<00:00, 6829.80it/s]
100%|██████████| 5555/5555 [00:00<00:00, 6855.89it/s]
100%|██████████| 5555/5555 [00:00<00:00, 6823.18it/s]
100%|██████████| 5555/5555 [00:00<00:00, 6855.44it/s]
100%|██████████| 5555/5555 [00:00<00:00, 6817.25it/s]
100%|██████████| 5555/5555 [00:00<00:00, 6813.11it/s]
100%|██████████| 5555/5555 [00:00<00:00, 6711.08it/s]
100%|██████████| 5555/5555 [00:00<00:00, 6734.02it/s]
100%|██████████| 5555/5555 [00:00<00:00, 6773.66it/s]


In [4]:
df.to_csv('/bsuhome/zacharykeskinen/spicy-snow/scripts/optimize/param_pdf/pdf_v1.csv')