# A testing ground for pyPIPS functionality

In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
import numpy.ma as ma
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.ticker as ticker
import matplotlib.dates as dates
from mpl_toolkits.axes_grid1 import ImageGrid,make_axes_locatable,host_subplot
#from mpl_toolkits.basemap import Basemap
from datetime import datetime, timedelta
import sys
import os
import pyPIPS.utils as utils
import pyPIPS.thermolib as thermo
import pyPIPS.DSDlib as dsd
#import pyPIPS.disdrometer_module as dis
import pyPIPS.plotmodule as PIPSplot
#import pyPIPS.simulator as sim
import pyPIPS.pips_io as pipsio
import pyPIPS.PIPS as pips
import pyPIPS.parsivel_params as pp
import pyPIPS.parsivel_qc as pqc
import pyPIPS.polarimetric as dualpol
#from pyCRMtools.modules import plotmodule as plotmod
from pyCRMtools.modules import utils as CRMutils
from pyCRMtools.pycaps import arps_read
from pyCRMtools.pycaps import pycaps_fields
from pyCRMtools.pycaps import calvars_radar as radar
import pandas as pd
import xarray as xr
import glob
import numpy.random as random
from scipy.stats import gamma, uniform
from scipy.special import gamma as gammafunc
from scipy import ndimage
from metpy.plots import StationPlot
from metpy.calc import wind_components
from metpy.cbook import get_test_data
from metpy.plots import StationPlot
from metpy.plots.wx_symbols import current_weather, sky_cover
from metpy.units import units
import warnings
warnings.simplefilter('ignore')
%matplotlib inline

In [None]:
min_diameter = pp.parsivel_parameters['min_diameter_bins_mm']
max_diameter = pp.parsivel_parameters['max_diameter_bins_mm']
bin_width = max_diameter - min_diameter
avg_diameter = pp.parsivel_parameters['avg_diameter_bins_mm']
min_fall_bins = pp.parsivel_parameters['min_fallspeed_bins_mps']
max_fall_bins = pp.parsivel_parameters['max_fallspeed_bins_mps']
avg_fall_bins = pp.parsivel_parameters['avg_fallspeed_bins_mps']
fall_bins_edges = np.append(min_fall_bins, max_fall_bins[-1])


def remove_unneeded(ds):
    dim_names_to_remove = ['fields_KHTX', 'fields_KGWX', 'fields']
    ds = ds.drop_dims(dim_names_to_remove, errors='ignore')
    return ds


def reindex_velocity_bins(VD_matrix, interval):
    # Set up new regularly spaced bin edges
    min_fallspeed = VD_matrix.coords['min_fallspeeds'][0]
    max_fallspeed = VD_matrix.coords['max_fallspeeds'][-1]
    new_vt_bin_edges = np.arange(min_fallspeed, max_fallspeed+interval, interval)
    new_min_vt_bins = np.delete(new_vt_bin_edges, -1)
    new_max_vt_bins = np.delete(new_vt_bin_edges, 0)
    new_center_vt_bins = 0.5 * (new_min_vt_bins + new_max_vt_bins)
    # Scale velocity counts by velocity bin width
    weights = VD_matrix['max_fallspeeds'] - VD_matrix['min_fallspeeds']
    VD_matrix_scaled = VD_matrix / weights
    # Set the minimum velocity for each bin as the new index. Needed so that the reindexing will properly
    # center the scaled counts in each sub-bin.
    VD_matrix_scaled = VD_matrix_scaled.set_index(fallspeed_bin='min_fallspeeds')
    # Now re-bin the scaled velocity counts into the new regularly spaced bins
    VD_matrix_scaled_rebinned = VD_matrix_scaled.reindex({'fallspeed_bin': new_min_vt_bins}, method='pad')
    # Recover original counts. NOTE: in general there may now be fractional counts in the new bins, but the
    # total number of drops is preserved by this procedure.
    VD_matrix_rebinned = VD_matrix_scaled_rebinned * interval
    # Set index back to fallspeed bin centers and add min_fallspeed coordinate back
    VD_matrix_rebinned.coords['fallspeed'] = ('fallspeed_bin', new_center_vt_bins)
    VD_matrix_rebinned = VD_matrix_rebinned.set_index(fallspeed_bin='fallspeed')
    VD_matrix_rebinned.coords['fallspeed'] = ('fallspeed_bin', new_center_vt_bins)
    VD_matrix_rebinned.coords['min_fallspeeds'] = ('fallspeed_bin', new_min_vt_bins)
    VD_matrix_rebinned.coords['max_fallspeeds'] = ('fallspeed_bin', new_max_vt_bins)
    # VD_matrix_rebinned.coords['diameter_bin'] = ('diameter_bin', VD_matrix_rebinned['diameter'])
    return VD_matrix_rebinned


def calc_mean_velocity(vd_matrix_rebinned):
    # Need to iterate across diameter dimension, which is annoying.
    mean_vels = []
    for vel_da in vd_matrix_rebinned.transpose():
        weights = vel_da.fillna(0)
        velocities = vel_da.coords['fallspeed']
        vel_weighted = velocities.weighted(weights)
        mean_vel_d = vel_weighted.mean('fallspeed_bin')
        mean_vels.append(mean_vel_d)
    mean_vel = xr.concat(mean_vels, dim='diameter_bin')
    return mean_vel

# TODO: Fix this for multiple times!
def shift_mean_velocity(vd_matrix_rebinned, vt_rain):
    # We have to iterate over both time and diameter dimensions, because the shift needed is a function of both
    vel_interval = vd_matrix_rebinned['fallspeed_bin'][1] - vd_matrix_rebinned['fallspeed_bin'][0].values
    # print(vel_interval)
    mean_vel = calc_mean_velocity(vd_matrix_rebinned)
    time_flag = 'time' in vd_matrix_rebinned.dims
    if time_flag:
        vt_rain = vt_rain.stack(stack_dim=['time', 'diameter_bin'])
        mean_vel = mean_vel.stack(stack_dim=['time', 'diameter_bin'])
        vd_matrix_rebinned = vd_matrix_rebinned.stack(stack_dim=['time', 'diameter_bin'])
        groupby_dims = 'stack_dim'
    else:
        groupby_dims = 'diameter_bin'
    vt_d_groups = vt_rain.groupby(groupby_dims, squeeze=False)
    mean_vel_d_groups = mean_vel.groupby(groupby_dims, squeeze=False)
    vel_da_groups = vd_matrix_rebinned.groupby(groupby_dims, squeeze=False)
    if time_flag:
        vd_matrix_rebinned = vd_matrix_rebinned.unstack('stack_dim')
    vel_da_list = []
    for vt_l, mean_vel_l, vel_da_l in zip(list(vt_d_groups), list(mean_vel_d_groups), list(vel_da_groups)):
        vt_d = vt_l[1]
        mean_vel_d = mean_vel_l[1]
        vel_da = vel_da_l[1]
        vt_diff = vt_d - mean_vel_d
        #print(vt_diff)
        if np.isfinite(vt_diff.values):
            vt_shift = int(vt_diff / vel_interval)
        else:
            vt_shift = 0
        # print(vt_shift)
        vel_da = vel_da.shift(fallspeed_bin=vt_shift)
        # print(vel_da)
        if time_flag:
            vel_da = vel_da.unstack('stack_dim')
            # print(vel_da)
            vd_matrix_rebinned.loc[dict(time=vel_da['time'].values, 
                                        diameter_bin=vel_da['diameter_bin'].values)] = vel_da
        else:
            vd_matrix_rebinned.loc[dict(diameter_bin=vel_da['diameter_bin'])] = vel_da
    return vd_matrix_rebinned


def rebin_to_parsivel(vd_matrix_rebinned):
    # Regroup into original fallspeed bins and get the dimension and coordinate names back in order. 
    # This is rather clunky, but it works
    vd_matrix_groups = vd_matrix_rebinned.groupby_bins('fallspeed_bin', fall_bins_edges)
    vd_matrix = vd_matrix_groups.sum()
    vd_matrix = vd_matrix.swap_dims({'fallspeed_bin_bins': 'fallspeed_bin'})
    vd_matrix = vd_matrix.rename({'fallspeed_bin_bins': 'fallspeed_bin'})
    vd_matrix = vd_matrix.reindex({'fallspeed_bin': avg_fall_bins})
    vd_matrix.coords['min_fallspeeds'] = ('fallspeed_bin', min_fall_bins)
    vd_matrix.coords['max_fallspeeds'] = ('fallspeed_bin', max_fall_bins)
    vd_matrix.coords['fallspeed'] = ('fallspeed_bin', avg_fall_bins)
    # Get the dimensions back into their original order
    vd_matrix = vd_matrix.transpose('time', 'fallspeed_bin', 'diameter_bin')
    return vd_matrix

In [None]:
from natsort import natsorted
from pprint import pprint


PIPS_dir = '/Volumes/scr_fast/Projects/VORTEXSE/obsdata/full_PIPS_dataset/'
parsivel_filename = os.path.join(PIPS_dir, 'parsivel_combined_IOP4B_D1_2016_PIPS1B_60s.nc')
#
parsivel_ds = xr.load_dataset(parsivel_filename) 
# parsivel_filenames = glob.glob(PIPS_dir + '/parsivel_combined*60s*nc')
# parsivel_ds_read = xr.open_mfdataset(parsivel_filenames, combine='nested', concat_dim="time", 
#                                      data_vars='minimal', coords='minimal', compat='override', parallel=True)
#parsivel_filenames = glob.glob(PIPS_dir + '/parsivel_combined*IOP4B_D1_2016*60s*nc')
#parsivel_filenames = natsorted(parsivel_filenames)
# total_DSDs = 0
# for parsivel_filename in parsivel_filenames:
#     parsivel_ds = xr.open_dataset(parsivel_filename)
#     vd_matrix_qc = parsivel_ds['VD_matrix_qc']
#     pcount2 = vd_matrix_qc.sum(dim=['fallspeed_bin', 'diameter_bin'])
#     pcount = parsivel_ds['pcount']
#     fig, ax = plt.subplots()
#     pcount.plot(ax=ax)
#     pcount2.plot(ax=ax)
# #     ND = parsivel_ds['ND_qc']
# #     ND = ND.where(ND > 0.)
# #     ND = ND.dropna(dim='time', how='all')
# #     num_DSDs = ND.sizes['time']
# #     total_DSDs = total_DSDs + num_DSDs
# #     print("Number of non-zero DSDs: {:d}".format(num_DSDs))
#     parsivel_ds.close()
    
# print("Total number of non-zero DSDs: {:d}".format(total_DSDs))

# parsivel_filename = os.path.join(PIPS_dir, 'ND_combined_full_dataset_60s.nc')
# parsivel_ds = xr.open_dataset(parsivel_filename)

#parsivel_ds = xr.open_dataset(parsivel_filenames[0])
#print(len(parsivel_filenames))
#pprint([os.path.basename(f) for f in parsivel_filenames])
#parsivel_ds = xr.open_mfdataset(parsivel_filenames, combine='nested', concat_dim='time', preprocess=pipsio.remove_unneeded) #, combine='nested', concat_dim='time')
# ND = parsivel_ds['ND_qc']
# ND = ND.where(ND > 0.)
# ND = ND.dropna(dim='time', how='all')
# total_DSDs = ND.sizes['time']
# print("Total number of non-zero DSDs: {:d}".format(total_DSDs))
#pprint(parsivel_filenames[34:])
#parsivel_filename = 'parsivel_combined_FMCW_2017_043017_PIPS2A_60s.nc'
#PIPS_dir = '/Users/dawson29/sshfs_mounts/depot/data/Projects/TriPIPS/2019'
#parsivel_filename = 'parsivel_combined_TriPIPS_092719_TriPIPS_10s.nc'
#parsivel_filepath = os.path.join(PIPS_dir, parsivel_filename)
#parsivel_ds = xr.open_dataset(parsivel_filepath)

In [None]:
# Make some index coordinates out of existing dimensions. This is a limitation of xarray right now that you
# can't index using coordinates that aren't index coordinates.
parsivel_ds.coords['fallspeed_bin'] = ('fallspeed_bin', parsivel_ds.coords['fallspeed'])
parsivel_ds.coords['diameter_bin'] = ('diameter_bin', parsivel_ds.coords['diameter'])
# Apply QC for margin fallers at least
# parsivel_ds['VD_matrix_marginQC'] = pqc.marginQC(parsivel_ds['VD_matrix'])
print(parsivel_ds)

In [None]:
min_diameter = pp.parsivel_parameters['min_diameter_bins_mm']
max_diameter = pp.parsivel_parameters['max_diameter_bins_mm']
bin_width = max_diameter - min_diameter
avg_diameter = pp.parsivel_parameters['avg_diameter_bins_mm']
min_fall_bins = pp.parsivel_parameters['min_fallspeed_bins_mps']
max_fall_bins = pp.parsivel_parameters['max_fallspeed_bins_mps']
avg_fall_bins = pp.parsivel_parameters['avg_fallspeed_bins_mps']
fall_bins_edges = np.append(min_fall_bins, max_fall_bins[-1])
print(min_fall_bins)
print(max_fall_bins)
print(fall_bins_edges)
print(max_fall_bins - min_fall_bins)
vd_matrix_da = parsivel_ds['VD_matrix']

axdict = {
    'min_diameter': min_diameter,
    'avg_diameter': avg_diameter,
    'min_fall_bins': min_fall_bins,
    'xlim': [0.0, 10.0],
    'ylim': [0.0, 15.0],
    'PIPS_name': parsivel_ds.probe_name
}

vd_matrix_da_full = vd_matrix_da.sum(dim='time')
vd_matrix_da_full = vd_matrix_da_full.where(vd_matrix_da_full > 0.)
time_to_plot = '2016-04-29T22:00:00'
# print("Plotting full-deployment v-d matrix for {} and {}".format(parsivel_ds.deployment_name,
#                                                                  parsivel_ds.probe_name))
axdict['cblim'] = (1, 200)
PSDdict = {
    'vd_matrix_da': vd_matrix_da.sel(time=time_to_plot, method='nearest'),
    'DSD_interval': len(vd_matrix_da['time']) * parsivel_ds.DSD_interval
    # FIXME
    # 'flaggedtime': parsivel_df['flaggedtimes'].values[t]
}
fig, ax = PIPSplot.plot_vel_D(axdict, PSDdict, parsivel_ds['rho'].sel(time=time_to_plot, method='nearest'))
# image_name =  \
#     '{}_{}_vel_D_{}_full.png'.format(PIPS_name, deployment_name, tag)
# image_path = os.path.join(vel_D_image_dir, image_name)
# fig.savefig(image_path, dpi=200, bbox_inches='tight')


In [None]:
interval = 0.1
# print(vd_matrix_da_full)
vd_matrix_da_rebinned = reindex_velocity_bins(vd_matrix_da, interval)
print(vd_matrix_da_rebinned)

In [None]:
axdict = {
    'min_diameter': min_diameter,
    'avg_diameter': avg_diameter,
    'min_fall_bins': vd_matrix_da_rebinned.coords['min_fallspeeds'],
    'xlim': [0.0, 10.0],
    'ylim': [0.0, 15.0],
    'PIPS_name': parsivel_ds.probe_name
}

# vd_matrix_da_full_rebinned = vd_matrix_da_full_rebinned.where(vd_matrix_da_full_rebinne# d > 0.)
print("Plotting full-deployment v-d matrix for {} and {}".format(parsivel_ds.deployment_name,
                                                                 parsivel_ds.probe_name))
axdict['cblim'] = (1, 50)
PSDdict = {
    'vd_matrix_da': vd_matrix_da_rebinned.sel(time=time_to_plot, method='nearest'),
    'DSD_interval': len(vd_matrix_da['time']) * parsivel_ds.DSD_interval
    # FIXME
    # 'flaggedtime': parsivel_df['flaggedtimes'].values[t]
}
fig, ax = PIPSplot.plot_vel_D(axdict, PSDdict, parsivel_ds['rho'].sel(time=time_to_plot, method='nearest'))
#ax.plot(avg_diameter, mean_vel)

In [None]:
# Check that counts are the same
counts_per_diam_orig = vd_matrix_da.sel(time=time_to_plot, method='nearest').sum('fallspeed_bin')
counts_per_diam_rebinned = vd_matrix_da_rebinned.sel(time=time_to_plot, method='nearest').sum('fallspeed_bin')
counts_per_diam_orig.plot()
counts_per_diam_rebinned.plot()

In [None]:
# Calculate rain terminal velocity as a function of time and diameter
vt_rain = pips.calc_empirical_fallspeed(vd_matrix_da_rebinned.coords['diameter'], correct_rho=True,
                                        rho=parsivel_ds['rho'])
vt_rain_da = xr.DataArray(vt_rain, 
                          coords={
                              'time': ('time', vd_matrix_da_rebinned.coords['time']),
                              'diameter_bin': ('diameter_bin', vd_matrix_da_rebinned.coords['diameter'])
                                 },
                          dims=['time', 'diameter_bin'])

In [None]:
print(vd_matrix_da_rebinned)
print(vt_rain_da)
print(vd_matrix_da_rebinned.dims)

In [None]:
mean_vel = calc_mean_velocity(vd_matrix_da_rebinned)

In [None]:
mean_vel.plot()
mean_vel

In [None]:
vd_matrix_da_rebinned_shifted = shift_mean_velocity(vd_matrix_da_rebinned, vt_rain_da)

In [None]:
print(vd_matrix_da_rebinned_shifted)

In [None]:
axdict = {
    'min_diameter': min_diameter,
    'avg_diameter': avg_diameter,
    'min_fall_bins': vd_matrix_da_rebinned_shifted.coords['min_fallspeeds'],
    'xlim': [0.0, 10.0],
    'ylim': [0.0, 15.0],
    'PIPS_name': parsivel_ds.probe_name
}

# vd_matrix_da_full_rebinned = vd_matrix_da_full_rebinned.where(vd_matrix_da_full_rebinne# d > 0.)
print("Plotting full-deployment v-d matrix for {} and {}".format(parsivel_ds.deployment_name,
                                                                 parsivel_ds.probe_name))
axdict['cblim'] = (1, 50)
PSDdict = {
    'vd_matrix_da': vd_matrix_da_rebinned_shifted.sel(time=time_to_plot, method='nearest'),
    'DSD_interval': len(vd_matrix_da['time']) * parsivel_ds.DSD_interval
    # FIXME
    # 'flaggedtime': parsivel_df['flaggedtimes'].values[t]
}
fig, ax = PIPSplot.plot_vel_D(axdict, PSDdict, parsivel_ds['rho'].sel(time=time_to_plot, method='nearest'))
#ax.plot(avg_diameter, mean_vel)

In [None]:
vd_matrix_da_shifted = rebin_to_parsivel(vd_matrix_da_rebinned_shifted)
print(vd_matrix_da_shifted)

In [None]:
axdict = {
    'min_diameter': min_diameter,
    'avg_diameter': avg_diameter,
    'min_fall_bins': vd_matrix_da_shifted.coords['min_fallspeeds'],
    'xlim': [0.0, 10.0],
    'ylim': [0.0, 15.0],
    'PIPS_name': parsivel_ds.probe_name
}

vd_matrix_da_shifted = vd_matrix_da_shifted.where(vd_matrix_da_shifted > 0.)
print("Plotting full-deployment v-d matrix for {} and {}".format(parsivel_ds.deployment_name,
                                                                 parsivel_ds.probe_name))
axdict['cblim'] = (1, 200)
PSDdict = {
    'vd_matrix_da': vd_matrix_da_shifted.sel(time=time_to_plot, method='nearest'),
    'DSD_interval': len(vd_matrix_da['time']) * parsivel_ds.DSD_interval
    # FIXME
    # 'flaggedtime': parsivel_df['flaggedtimes'].values[t]
}
fig, ax = PIPSplot.plot_vel_D(axdict, PSDdict, parsivel_ds['rho'].sel(time=time_to_plot, method='nearest'))

In [None]:
# Apply some QC love
print(vd_matrix_da_shifted)
vd_matrix_qc_shifted_da = pqc.strongwindQC(vd_matrix_da_shifted)
vd_matrix_qc_shifted_da = pqc.splashingQC(vd_matrix_qc_shifted_da)
vd_matrix_qc_shifted_da = pqc.marginQC(vd_matrix_qc_shifted_da)
fallspeedmask = pqc.get_fallspeed_mask(avg_diameter, avg_fall_bins)
vd_matrix_qc_shifted_da = pqc.rainfallspeedQC(vd_matrix_qc_shifted_da, fallspeedmask)
vd_matrix_qc_shifted_da = pqc.rainonlyQC(vd_matrix_qc_shifted_da)

In [None]:
axdict = {
    'min_diameter': min_diameter,
    'avg_diameter': avg_diameter,
    'min_fall_bins': vd_matrix_qc_shifted_da.coords['min_fallspeeds'],
    'xlim': [0.0, 10.0],
    'ylim': [0.0, 15.0],
    'PIPS_name': parsivel_ds.probe_name
}

vd_matrix_qc_shifted_da = vd_matrix_qc_shifted_da.where(vd_matrix_qc_shifted_da > 0.)
print("Plotting full-deployment v-d matrix for {} and {}".format(parsivel_ds.deployment_name,
                                                                 parsivel_ds.probe_name))
axdict['cblim'] = (1, 200)
PSDdict = {
    'vd_matrix_da': vd_matrix_qc_shifted_da.sel(time=time_to_plot, method='nearest'),
    'DSD_interval': len(vd_matrix_qc_shifted_da['time']) * parsivel_ds.DSD_interval
    # FIXME
    # 'flaggedtime': parsivel_df['flaggedtimes'].values[t]
}
fig, ax = PIPSplot.plot_vel_D(axdict, PSDdict, parsivel_ds['rho'].sel(time=time_to_plot, method='nearest'))

In [None]:
# Compute ND from the shifted VD matrix
fallspeed_spectrum = pips.calc_fallspeed_spectrum(avg_diameter, avg_fall_bins, correct_rho=True,
                                                  rho=parsivel_ds['rho'])

vd_matrix_shifted = vd_matrix_da_shifted.where(vd_matrix_da_shifted > 0.)
vd_matrix_shifted_qc = vd_matrix_qc_shifted_da.where(vd_matrix_qc_shifted_da > 0.)
ND_RB15_vshift = pips.calc_ND(vd_matrix_shifted_qc, fallspeed_spectrum, parsivel_ds.DSD_interval)

In [None]:
 # According to RB15, should use internal parsivel rainrate to categorize
rainrate = parsivel_ds['precipintensity']
RR_ind = (rainrate < pp.RB15_RR_max[-1]) & (rainrate >= pp.RB15_RR_min[0])



ND_RB15 = ND_RB15_vshift.copy()

In [None]:
RR_edges = np.append(pp.RB15_RR_min, pp.RB15_RR_max[-1])
# RR_edges = pp.RB15_RR_min
print(RR_edges)
parsivel_ds_RR_groups = parsivel_ds.groupby_bins('precipintensity', RR_edges, right=False, labels=pp.RB15_RR_min)
print(parsivel_ds_RR_groups)
# Why aren't the labels sorted???

In [None]:
group_indices = parsivel_ds_RR_groups.groups
for idx, group in enumerate(parsivel_ds_RR_groups):
#     group_idx = group_indices[group[0]]
#     ND_temp = ND_RB15_vshift[group_idx]
#     RB15_correction_factor = pp.RB15_correction_factors[idx]
#     ND_temp2 = RB15_correction_factor * ND_temp
#     ND_temp2 = ND_temp2.transpose()
#     ND_RB15[group_idx] = ND_temp2
    group_idx = group_indices[group[0]]
    ND_temp = ND_RB15_vshift[group_idx]
    RB15_correction_factor = pp.RB15_correction_factors.sel(rainrate=group[0])
    ND_temp2 = RB15_correction_factor * ND_temp
    ND_temp2 = ND_temp2.transpose()    
    ND_RB15[group_idx] = ND_temp2
    
    print(group_indices[group[0]])
    print(idx, group[0])
    print(RB15_correction_factor)
    print(ND_temp)
    print(ND_temp2)