# Batch SnowRadar Processing Example
A simple workflow using multiple CPUs and landmask/QA filtering

In [1]:
# Community imports
import pandas as pd
from glob import glob
import os
from pathlib import Path
import geopandas as gpd
import sys
sys.path.append('/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/pySnowRadar')


# pySnowRadar imports|
from pySnowRadar import SnowRadar
from pySnowRadar.qc import error_check
from pySnowRadar.processing import geo_filter,geo_filter_insitu_sites, batch_process
from pySnowRadar.algorithms import Wavelet_TN, Peakiness
from pySnowRadar.processing import extract_layers

from tqdm import tqdm
from scipy.signal import find_peaks
from datetime import datetime, timedelta
from thefuzz import process, fuzz
from scipy.spatial.distance import cdist
from tqdm import tqdm
import h5py
import numpy as np

### BASIN-SCALE

In [None]:
input_sr_data_path = '/Volumes/PortableSSD/OIB_2016/no19th/*_deconv.nc'
output_path = '/Volumes/PortableSSD/OIB_2016/Peakiness_v1'
ATM_folder  = '/Volumes/PortableSSD/ATM_levelled_classified_v2'

load_filtered = True
log_name = 'log.csv'


if load_filtered:
        
        insitu_site_filtered = pd.read_csv(os.path.join(output_path, log_name), index_col=0).values.flatten()
else:
        
        input_sr_data = glob(input_sr_data_path)
        insitu_site_filtered = geo_filter(input_sr_data)
        pd.Series(insitu_site_filtered).to_csv(os.path.join(output_path, log_name))


done_files = os.listdir(output_path)
done_files = np.array([os.path.join('/Volumes/PortableSSD/OIB_2016/no19th/', x) for x in done_files if x.endswith('.nc') and x.startswith('I')])
print(len(insitu_site_filtered))
insitu_site_filtered = np.setdiff1d(insitu_site_filtered, done_files)
print(len(insitu_site_filtered))

        
print('Geo-filtering done')


# Generate error codes for SR data
# sr_data = [SnowRadar(sr, 'full') for sr in insitu_site_filtered]
# error_codes = [pd.Series(error_check(sr).tolist()) for sr in sr_data]

workers = 8
# picker = Wavelet_TN
picker = Peakiness

# FOR WAVELET_TN
# params = {
#         'snow_density': 0.3,
#         'ref_snow_layer': 0.5,
#         'cwt_precision': 10
#         }

params = {
        'snow_density':0.3,
        'log_peak_threshold' : 0.4,
        'lin_peak_threshold' : 0.4, 
        'pp_r_threshold' : 30, 
        'pp_l_threshold' : 30
        }

res = batch_process(insitu_site_filtered, picker, params, workers,
                dump_results = True,
                overwrite = False,
                path = output_path,
                atm_folder = ATM_folder
                )

### ECCC 

In [2]:
def get_snow_density(path, sites):
    files = os.listdir(path)
    files = [f for f in files if f.endswith('csv')]
    snow_densities_tmp = {}
    for f in files:
        # print(f)
        site = f.split('_')[1]
        
        if site not in snow_densities_tmp.keys():
            snow_densities_tmp[site] = pd.read_csv(os.path.join(path, f), index_col=0)
        else:
            snow_densities_tmp[site] = pd.concat([snow_densities_tmp[site], pd.read_csv(os.path.join(path, f), index_col=0)]) 
    snow_densities = {}
    for site in sites:
        snow_densities[site] = np.nanmean(snow_densities_tmp[site]['density'])/1000
    return snow_densities

In [None]:
#WAVELET

year = '2014'
today = datetime.today().strftime('%Y%m%d')
path_to_shapes = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/Eureka/grid_extents_v4/'

if year == '2014':
        campaign = 'EUREKA2014' 
        input_sr_data_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Echograms/20140325/*/*_deconv.nc'        
        output_path = f'/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Wavelet/{today}_transect/'
        # ATM_folder = ATM_folder ='/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/ATM/2014'
        ATM_folder  = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/ATM/2014/ATM_levelled_classified'
        sites = [2,3,4]
        snow_densities = {2:0.3, 3:0.3, 4:0.3, 'transect':0.3}
        
elif year == '2016':    
        campaign = 'EUREKA2016' 
        input_sr_data_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Echograms/20160419/*/*_deconv.nc'        
        output_path = f'/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Wavelet/{today}_SRprecision/'
        # ATM_folder = ATM_folder ='/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/ATM/2016'
        snow_pit_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/Eureka/SnowPits/20250126'
        sites = ['grid3', 'grid4', 'grid5', 'grid6', 'grid7', 'grid8'] 
        snow_densities = get_snow_density(snow_pit_path, sites)

for site in tqdm(['transect']):
# for site in tqdm(sites):

        print(site)
        input_sr_data = glob(input_sr_data_path)
        # insitu_site_filtered = geo_filter(input_sr_data)
        insitu_site_filtered = geo_filter_insitu_sites(path_to_shapes, year, site, input_sr_data)
        print('Geo-filtering done')
        
        # Generate error codes for SR data
        sr_data = [SnowRadar(sr, 'full') for sr in insitu_site_filtered]
        print(len(sr_data))
        
        error_codes = [pd.Series(error_check(sr).tolist()) for sr in sr_data]

        workers = 8
        picker = Wavelet_TN

        # FOR WAVELET_TN
        params={
                'snow_density': snow_densities[site],
                'ref_snow_layer': 0.5,
                'cwt_precision': 10
                }

        res = batch_process(insitu_site_filtered, picker, params, workers,
                        dump_results = True,
                        overwrite = True, 
                        path = os.path.join(output_path, str(site)),
                        atm_folder = ATM_folder
                        )

In [None]:
#PEAKINESS
picker = Peakiness

year = '2014'
today = datetime.today().strftime('%Y%m%d')
path_to_shapes = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/Eureka/grid_extents_v4/'

if year == '2014':
        campaign = 'EUREKA2014' 
        input_sr_data_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Echograms/20140325/*/*_deconv.nc'        
        output_path = f'/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Peakiness/{today}_transect/'
        # ATM_folder = ATM_folder ='/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/ATM/2014'
        ATM_folder  = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/ATM/2014/ATM_levelled_classified'
        sites = [2,3,4]
        snow_densities = {2:0.3, 3:0.3, 4:0.3, 'transect':0.3}
        
elif year == '2016':    
        campaign = 'EUREKA2016' 
        input_sr_data_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Echograms/20160419/*/*_deconv.nc'        
        output_path = f'/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Peakiness/{today}_SRprecision/'
        # ATM_folder = ATM_folder ='/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/ATM/2016'
        snow_pit_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/Eureka/SnowPits/20250126'
        sites = ['grid3', 'grid4', 'grid5', 'grid6', 'grid7', 'grid8'] 
        snow_densities = get_snow_density(snow_pit_path, sites)

for site in tqdm(['transect']):

# for site in tqdm(sites):
        print(site)
        input_sr_data = glob(input_sr_data_path)
        # insitu_site_filtered = geo_filter(input_sr_data)
        insitu_site_filtered = geo_filter_insitu_sites(path_to_shapes, year, site, input_sr_data)
        print('Geo-filtering done')
        
        # Generate error codes for SR data
        sr_data = [SnowRadar(sr, 'full') for sr in insitu_site_filtered]
        print(len(sr_data))
        
        error_codes = [pd.Series(error_check(sr).tolist()) for sr in sr_data]

        workers = 8

        # FOR PEAKINESS
        params = {
                'snow_density': snow_densities[site],
                'log_peak_threshold' : 0.4,
                'lin_peak_threshold' : 0.4, 
                'pp_r_threshold' : 30, 
                'pp_l_threshold' : 30
                }

        res = batch_process(insitu_site_filtered, picker, params, workers,
                        dump_results=True,
                        overwrite=True,
                        path=os.path.join(output_path, str(site)),
                        atm_folder=ATM_folder
                        )

In [12]:
#PEAKINESS
year = '2016'
today = datetime.today().strftime('%Y%m%d')
path_to_shapes = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/Eureka/grid_extents_v3/'

if year == '2014':
        campaign = 'EUREKA2014' 
        input_sr_data_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Echograms/20140325/*/*_deconv.nc'
        output_path = f'/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Peakiness/{today}/'
        ATM_folder = ATM_folder ='/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/ATM/2014'
        sites = [2,3,4]
        
elif year == '2016':    
        campaign = 'EUREKA2016' 
        input_sr_data_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Echograms/20160419/*/*_deconv.nc'
        output_path = f'/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Peakiness/{today}/'
        ATM_folder = ATM_folder ='/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/ATM/2016'
        
        sites = ['grid3', 'grid4', 'grid5', 'grid6', 'grid7', 'grid8'] 
        
for site in tqdm(sites):
        print(site)
        input_sr_data = glob(input_sr_data_path)
        # insitu_site_filtered = geo_filter(input_sr_data)
        insitu_site_filtered = geo_filter_insitu_sites(path_to_shapes, year, site, input_sr_data)
        print('Geo-filtering done')
        
        # Generate error codes for SR data
        sr_data = [SnowRadar(sr, 'full') for sr in insitu_site_filtered]
        error_codes = [pd.Series(error_check(sr).tolist()) for sr in sr_data]

        workers = 8    
        picker = Peakiness

        # FOR PEAKINESS
        params = {
                'snow_density':0.3,
                'log_peak_threshold' : 0.4,
                'lin_peak_threshold' : 0.4, 
                'pp_r_threshold' : 30, 
                'pp_l_threshold' : 30
                }

        res = batch_process(insitu_site_filtered, picker, params, workers,
                        dump_results=True,
                        overwrite=True,
                        path = os.path.join(output_path, str(site)),
                        atm_folder = None
                        # atm_folder
                        )
        

  0%|          | 0/6 [00:00<?, ?it/s]

grid3
Geo-filtering done


 17%|█▋        | 1/6 [00:32<02:40, 32.07s/it]

grid4
Geo-filtering done


 33%|███▎      | 2/6 [01:06<02:14, 33.60s/it]

grid5
Geo-filtering done


 50%|█████     | 3/6 [01:36<01:35, 31.96s/it]

grid6
Geo-filtering done


 67%|██████▋   | 4/6 [02:13<01:07, 33.72s/it]

grid7
Geo-filtering done


 83%|████████▎ | 5/6 [02:46<00:33, 33.71s/it]

grid8
Geo-filtering done


100%|██████████| 6/6 [03:18<00:00, 33.02s/it]


### WAVELET SENSITIVITY ANALYSIS
Setting different ref_snow_layer values

In [25]:
#WAVELET

year = '2016'
today = datetime.today().strftime('%Y%m%d')
path_to_shapes = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/Eureka/grid_extents_v4/'

if year == '2014':
        campaign = 'EUREKA2014' 
        input_sr_data_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Echograms/20140325/*/*_deconv.nc'        
        output_path = f'/Volumes/PortableSSD/Wavelet_sensitivity'
        ATM_folder = ATM_folder ='/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/ATM/2014'
        sites = [2,3,4]
        snow_densities = {2:0.3, 3:0.3, 4:0.3}
        
elif year == '2016':    
        campaign = 'EUREKA2016' 
        input_sr_data_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Echograms/20160419/*/*_deconv.nc'        
        output_path = f'/Volumes/PortableSSD/Wavelet_sensitivity'
        ATM_folder = ATM_folder ='/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/ATM/2016'
        snow_pit_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/Eureka/SnowPits/20250126'
        sites = ['grid3', 'grid4', 'grid5', 'grid6', 'grid7', 'grid8'] 
        snow_densities = get_snow_density(snow_pit_path, sites)


for site in tqdm(sites):
        print(site)
        input_sr_data = glob(input_sr_data_path)
        # insitu_site_filtered = geo_filter(input_sr_data)
        insitu_site_filtered = geo_filter_insitu_sites(path_to_shapes, year, site, input_sr_data)
        print('Geo-filtering done')
        
        # Generate error codes for SR data
        sr_data = [SnowRadar(sr, 'full') for sr in insitu_site_filtered]
        print(len(sr_data))
        
        error_codes = [pd.Series(error_check(sr).tolist()) for sr in sr_data]

        workers = 4
        picker = Wavelet_TN


        for ref_snow_layer in [0.2, 0.3, 0.4, 0.5, 0.75, 1, 1.5]:

            # FOR WAVELET_TN
            params={
                    'snow_density': snow_densities[site],
                    'ref_snow_layer': ref_snow_layer,
                    'cwt_precision': 10
                    }

            res = batch_process(insitu_site_filtered, picker, params, workers,
                            dump_results=True,
                            overwrite=True, 
                            path = os.path.join(output_path, str(site), str(ref_snow_layer)),
                            atm_folder = None
                            )

  0%|          | 0/6 [00:00<?, ?it/s]

grid3
Geo-filtering done
8


 17%|█▋        | 1/6 [03:57<19:46, 237.36s/it]

grid4
Geo-filtering done
11


 33%|███▎      | 2/6 [08:40<17:37, 264.43s/it]

grid5
Geo-filtering done
3


 50%|█████     | 3/6 [10:35<09:47, 195.87s/it]

grid6
Geo-filtering done
20


 67%|██████▋   | 4/6 [19:26<10:56, 328.24s/it]

grid7
Geo-filtering done
16


 83%|████████▎ | 5/6 [26:30<06:02, 362.72s/it]

grid8
Geo-filtering done
7


100%|██████████| 6/6 [30:26<00:00, 304.37s/it]


### PEAKINESS SENSITIVITY ANALYSIS

In [9]:
outer_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Peakiness_tuning'
path_to_shapes = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/Eureka/grid_extents_v4/'
year = '2016'

file_list = []
for root, dirs, files in os.walk('/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Wavelet/20250112'):
    for file in files:
        file_list.append(os.path.join(root, file))
sr_files = [x.split('/')[-1] for x in file_list if x.endswith('.nc')]

input_sr_data = glob('/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Echograms/20160419/*/*_deconv.nc')
input_sr_data = [x for x in input_sr_data if x.split('/')[-1] in sr_files]


sites = ['grid3', 'grid4', 'grid5', 'grid6', 'grid7', 'grid8']
insitu_site_filtered = {}
for site in sites:
    insitu_site_filtered[site]= geo_filter_insitu_sites(path_to_shapes, year, site, input_sr_data)

# Generate error codes for SR data
# sr_data = [SnowRadar(sr, 'full') for sr in insitu_site_filtered]
# error_codes = [pd.Series(error_check(sr).tolist()) for sr in sr_data]

print('starting computation')


starting computation


In [None]:

for site in sites:
    outer_path = '/Users/torka/Library/CloudStorage/OneDrive-Personal/MarineSciences/MasterThs-T/Data/OIB/Peakiness_tuning'
    outer_path = os.path.join(outer_path, site)
    Path(outer_path).mkdir(exist_ok=True)
    
    for log_peak_threshold in [0.4, 0.5, 0.6, 0.7, 0.8]:
        for lin_peak_threshold in [0.2, 0.3, 0.4, 0.5]:
            for pp_r_threshold in [20, 25, 30, 35]:
                for pp_l_threshold in [20, 25, 30, 35]:
                    
                    folder_name = str(log_peak_threshold) + '_' + str(lin_peak_threshold) + '_' + str(pp_r_threshold) + '_' + str(pp_l_threshold)
                    inner_path = os.path.join(outer_path, folder_name)
                    Path(inner_path).mkdir(exist_ok=True)

                    params={
                        'snow_density': 0.3,
                        'log_peak_threshold' : log_peak_threshold,
                        'lin_peak_threshold' : lin_peak_threshold, 
                        'pp_r_threshold' : pp_r_threshold, 
                        'pp_l_threshold' : pp_l_threshold
                    }

                    workers = 8
                    picker = Peakiness
                    res = batch_process(insitu_site_filtered[site],
                                        picker,
                                        params,
                                        workers,
                                        dump_results=True,
                                         overwrite=True,
                                        path=inner_path,
                                        atm_folder=None
                                        )
                    
        print('log done')