In [None]:
#This notebook is for plotting of PIPS data from the real-time script's netCDF file dumps and merging 
#with netCDF
#files produced from the onboard card data if necessary
# IMPORTANT: It should be run right after running PIPS_to_nc.py
# TODO: make this into a stand-alone command-line driven script like the others
%matplotlib notebook
import os
import pandas as pd
import xarray as xr
import numpy as np
import time
import glob
import matplotlib.pyplot as plt
from pyPIPS import thermolib as thermo
from pyPIPS import timemodule as tm
import pyPIPS.PIPS as pips
from datetime import datetime, timedelta
import matplotlib.dates as dates
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid, make_axes_locatable, host_subplot
import pyPIPS.plotmodule as pm
from pyPIPS.PIPS import avg_diameter, avg_fall_bins, max_diameter, \
    min_diameter, min_fall_bins, diameter_edges, fall_bins_edges
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# Function definitions. 

def get_files(file_list, starttime, endtime, ftype='onesec'):
    ''' this seems like a janky way to do it, but is actually 3x faster than
        making a loop of files.'''

    len_prefix = 8 + len(ftype)

    day_str = [(starttime + timedelta(days=i)).strftime("%Y%m%d")
               for i in range((endtime - starttime).days + 1)]

    # find all PIPS files with days between starttime and endtime
    file_list = [file_name for file_name in file_list if any(day in file_name for day in day_str)]
    # file_list = [f for subf in file_list for f in subf]  # flatten list in case of multiple days

    if file_list:
        # sort files by date, then find nearest indices for all the dates, and loop over that
        sorted_files = sorted(file_list,
                              key=lambda f: datetime.strptime(f[len_prefix:len_prefix + 14],
                                                              '%Y%m%d%H%M%S'))
        starttimes = [datetime.strptime(f[len_prefix:len_prefix + 14], '%Y%m%d%H%M%S')
                      for f in sorted_files]
        endtimes = [datetime.strptime(f[len_prefix + 15:len_prefix + 29], '%Y%m%d%H%M%S')
                    for f in sorted_files]
        _, idx1 = min((abs(val - starttime), idx) for (idx, val) in enumerate(starttimes))
        _, idx2 = min((abs(val - endtime), idx) for (idx, val) in enumerate(endtimes))
        file_list = sorted_files[idx1:idx2 + 1]

    return file_list


In [None]:
# Set up dictionaries to control plotting parameters

dateformat = '%H:%M'

# Temperature and dewpoint
temp_dewp_ax_params = {
    'majorxlocator': dates.MinuteLocator(byminute=[0, 15, 30, 45], interval=1), 
    'majorxformatter': dates.DateFormatter(dateformat),
    'minorxlocator': dates.MinuteLocator(byminute=[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55], interval=1),
    'axeslimits': [None, (-5., 35.)],
    'axeslabels': ['Time (H:M) UTC', r'Temperature ($^{\circ}$C)']
}

# Wind speed and direction
windspd_ax_params = {
    'majorxlocator': dates.MinuteLocator(byminute=[0, 15, 30, 45], interval=1), 
    'majorxformatter': dates.DateFormatter(dateformat),
    'minorxlocator': dates.MinuteLocator(byminute=[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55], interval=1),
    'axeslimits': [None, [0.0, 25.0]],
    'axeslabels': ['Time (H:M) UTC', r'wind speed (m s$^{-1}$)']
}

winddir_ax_params = {
    'majorylocator': ticker.MultipleLocator(45.),
    'axeslimits': [None, [0.0, 360.0]],
    'axeslabels': [None, r'Wind direction ($^{\circ}$C)']
}

pressure_ax_params = {
    'majorylocator': ticker.MultipleLocator(5.),
    'axeslimits': [None, [940., 980.]],
    'axeslabels': [None, r'Pressure (hPa)']
}



# Number concentration
log_ND_params = {
    'type': 'pcolor', 
    'vlimits': [-1.0, 3.0],
    'clabel': r'log[N ($m^{-3} mm^{-1}$)]'
}

log_ND_ax_params = {
    'majorxlocator': dates.MinuteLocator(byminute=[0, 15, 30, 45], interval=1), 
    'majorxformatter': dates.DateFormatter(dateformat),
    'minorxlocator': dates.MinuteLocator(byminute=[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55], interval=1),
    'axeslimits': [None, [0.0, 9.0]],
    'majorylocator': ticker.MultipleLocator(base=0.25),
    'axeslabels': [None, 'D (mm)']
}

In [None]:
# Set up directories reading the data and plotting
# base_output_dir = '/Users/dawson29/sshfs_mounts/depot/data/Projects/TriPIPS/webdata/'
deployment_name = '022723_mass_test' # 'IOP2_030323'

base_realtime_input_dir = '/Users/dawson29/Dropbox/Projects/PERiLS/obsdata/2023/2023_realtime'
realtime_netcdf_input_dir = os.path.join(base_realtime_input_dir, deployment_name)

base_card_input_dir = '/Users/dawson29/Dropbox/Projects/PERiLS/obsdata/2023/'
card_netcdf_input_dir = os.path.join(base_card_input_dir, deployment_name, 'netcdf')
card_netcdf_output_dir = os.path.join(base_card_input_dir, deployment_name, 'netcdf_rt_merged')
if not os.path.exists(card_netcdf_output_dir):
    os.makedirs(card_netcdf_output_dir)

plot_output_dir = os.path.join(base_realtime_input_dir, 'plots')

if not os.path.exists(plot_output_dir):
    os.makedirs(plot_output_dir)


In [None]:
# Set up the PIPS name, dates and times to read and plot

PIPS_name = 'PIPS3B'

starttime_stamp = '20230227030000'
endtime_stamp = '20230227213000'

starttime_dt = datetime.strptime(starttime_stamp, tm.timefmt3)
endtime_dt = datetime.strptime(endtime_stamp, tm.timefmt3)

file_path_list_onePIPS = glob.glob(realtime_netcdf_input_dir + f'/*{PIPS_name}*nc')
file_list_onePIPS = [os.path.basename(file_path) for file_path in file_path_list_onePIPS]

file_list_onesec = [file_name for file_name in file_list_onePIPS if 'onesec' in file_name]
file_list_onesec = [file_name for file_name in file_list_onesec if 'current' not in file_name]
file_list_onesec = get_files(file_list_onesec, starttime_dt, endtime_dt)
file_path_list_onesec = [os.path.join(realtime_netcdf_input_dir, file_name) for file_name in file_list_onesec]


file_list_ND = [file_name for file_name in file_list_onePIPS if 'ND' in file_name]
file_list_ND = [file_name for file_name in file_list_ND if 'current' not in file_name]
file_list_ND = get_files(file_list_ND, starttime_dt, endtime_dt, ftype='ND')
file_path_list_ND = [os.path.join(realtime_netcdf_input_dir, file_name) for file_name in file_list_ND]

file_list_spectrum = [file_name for file_name in file_list_onePIPS if 'spectrum' in file_name]
file_list_spectrum = [file_name for file_name in file_list_spectrum if 'current' not in file_name]
file_list_spectrum = get_files(file_list_spectrum, starttime_dt, endtime_dt, ftype='spectrum')
file_path_list_spectrum = [os.path.join(realtime_netcdf_input_dir, file_name) for file_name in file_list_spectrum]

file_list_telegram = [file_name for file_name in file_list_onePIPS if 'telegram' in file_name]
file_list_telegram = [file_name for file_name in file_list_telegram if 'current' not in file_name]
file_list_telegram = get_files(file_list_telegram, starttime_dt, endtime_dt, ftype='telegram')
file_path_list_telegram = [os.path.join(realtime_netcdf_input_dir, file_name) for file_name in file_list_telegram]

# datetime_stamp = '20220505210000'
# ND_file = 'ND_{}.nc'.format(datetime_stamp)
# ND_path = os.path.join(netcdf_input_dir, ND_file)
# ND_ds = xr.load_dataset(ND_path)

# conv_file = 'onesec_{}.nc'.format(datetime_stamp)
# conv_path = os.path.join(netcdf_input_dir, conv_file)
# conv_ds = xr.load_dataset(conv_path)


In [None]:
# Now read in real-time netCDF files
onesec_rt_ds = xr.open_mfdataset(file_path_list_onesec, combine='nested', concat_dim='logger_datetime',
                                 decode_timedelta=False)
onesec_rt_ds = onesec_rt_ds.rename({'logger_datetime': 'time'})
onesec_rt_ds = onesec_rt_ds.drop_duplicates('time')
ND_rt_ds = xr.open_mfdataset(file_path_list_ND, combine='nested', concat_dim='time',
                             decode_timedelta=False)
ND_rt_ds = ND_rt_ds.drop_duplicates('time')
ND_rt_ds = ND_rt_ds.rename_dims({'diameter': 'diameter_bin'})
spectrum_rt_ds = xr.open_mfdataset(file_path_list_spectrum, combine='nested', concat_dim='time',
                                   decode_timedelta=False)
spectrum_rt_ds = spectrum_rt_ds.drop_duplicates('time')
spectrum_rt_ds = spectrum_rt_ds.rename_dims({'diameter': 'diameter_bin', 'velocity': 'fallspeed_bin'})
spectrum_rt_ds = spectrum_rt_ds.rename({'velocity': 'fallspeed'})
telegram_rt_ds = xr.open_mfdataset(file_path_list_telegram, combine='nested', concat_dim='index',
                                   decode_timedelta=False)
telegram_rt_ds = telegram_rt_ds.rename({'index': 'time'})
telegram_rt_ds = telegram_rt_ds.drop_duplicates('time')

In [None]:
# Read netCDF files from card
parsivel_combined_card_filename = f'parsivel_combined_{deployment_name}_{PIPS_name}_10s.nc'
parsivel_combined_card_path = os.path.join(card_netcdf_input_dir, parsivel_combined_card_filename)

onesec_card_filename = f'conventional_raw_{deployment_name}_{PIPS_name}.nc'
onesec_card_path = os.path.join(card_netcdf_input_dir, onesec_card_filename)

parsivel_combined_card_ds = xr.open_dataset(parsivel_combined_card_path, decode_timedelta=False)
onesec_card_ds = xr.open_dataset(onesec_card_path, decode_timedelta=False)

In [None]:
# Correct times from from the real-time files to match the GPS times. This is already done for the times
# from the card-derived netCDF files

# Get first good GPS time in file

first_good_GPS_time = onesec_rt_ds.where(onesec_rt_ds['GPS_status'].compute() == 'A', drop=True).isel(time=0)

logger_datetime = first_good_GPS_time['time'].values
logger_datetime = pd.to_datetime(logger_datetime).to_pydatetime()

GPS_date = str(first_good_GPS_time['GPS_date'].values)
GPS_time = str(first_good_GPS_time['GPS_time'].values)

# Next, construct datetime object from GPS info
# Construct datetime object
gyear = int('20' + GPS_date[4:])
gmonth = int(GPS_date[2:4])
gday = int(GPS_date[:2])
ghour = int(GPS_time[:2])
gmin = int(GPS_time[2:4])
gsec = int(GPS_time[4:6])

GPS_datetime = datetime(gyear, gmonth, gday, ghour, gmin, gsec)
GPS_offset = GPS_datetime - logger_datetime
print('GPS time: {}, Logger time: {}'.format(GPS_datetime.ctime(),
                                             logger_datetime.ctime()))
print('GPS Offset: {}'.format(str(GPS_offset)))

# print(onesec_card_ds.where(onesec_card_ds['GPS_status'] == 'A', drop=True).isel(time=0))

In [None]:
old_times = pd.to_datetime(onesec_rt_ds['time']).to_pydatetime()
new_times = old_times + GPS_offset
onesec_rt_ds = onesec_rt_ds.assign_coords({'time': new_times})

In [None]:
old_ND_times = pd.to_datetime(ND_rt_ds['time']).to_pydatetime()
new_times = old_ND_times + GPS_offset
ND_rt_ds = ND_rt_ds.assign_coords({'time': new_times})

old_spectrum_times = pd.to_datetime(spectrum_rt_ds['time']).to_pydatetime()
new_times = old_spectrum_times + GPS_offset
spectrum_rt_ds = spectrum_rt_ds.assign_coords({'time': new_times})

old_telegram_times = pd.to_datetime(telegram_rt_ds['time']).to_pydatetime()
new_times = old_telegram_times + GPS_offset
telegram_rt_ds = telegram_rt_ds.assign_coords({'time': new_times})

In [None]:
# Now, figure out what times are missing from the data for each source. Start with the one-sec data.
# First find earliest start and latest end times
onesec_card_starttime = onesec_card_ds['time'][0].values
onesec_card_endtime = onesec_card_ds['time'][-1].values
onesec_rt_starttime = onesec_rt_ds['time'][0].values
onesec_rt_endtime = onesec_rt_ds['time'][-1].values

starttime_onesec = min(onesec_card_starttime, onesec_rt_starttime)
endtime_onesec = max(onesec_card_endtime, onesec_rt_endtime)

print(starttime_onesec, endtime_onesec)

# Then, create a new index of all times at 1-s intervals between the two
all_onesec_times = xr.date_range(starttime_onesec, endtime_onesec, freq='1S')
print(all_onesec_times)
print(all_onesec_times[0], all_onesec_times[-1])

In [None]:
print(onesec_rt_ds.indexes['time'])

In [None]:
# time_diffs_onesec_card = np.diff(onesec_ds['time'].values)
# time_diffs_onesec_rt = np.diff(onesec_card_ds['time'].values)

# fig, ax = plt.subplots()

# ax.plot(time_diffs_onesec_card)
# ax.plot(time_diffs_onesec_rt)

In [None]:
# [time for time in onesec_rt_ds.indexes['time']]

In [None]:
missing_times_rt = np.array([False if time in onesec_rt_ds.indexes['time'] 
                             else True for time in all_onesec_times])

missing_times_card = np.array([False if time in onesec_card_ds.indexes['time'] 
                               else True for time in all_onesec_times])

missing_times_both = missing_times_rt & missing_times_card

In [None]:
fig, ax = plt.subplots()

ax.plot(all_onesec_times, missing_times_rt, label='Real time data')
ax.plot(all_onesec_times, missing_times_card, label='Card data')
ax.legend(loc='best')

In [None]:
fig, ax = plt.subplots()

ax.plot(all_onesec_times, missing_times_both, label='Both datasets')
ax.legend(loc='best')

In [None]:
# It turns out that the way the dewpoint is computed for the real-time data is wrong. It's using the fasttemp
# when it should be using the slowtemp. Fix that here, and also rederive the RH and other thermo
# parameters accordingly.

# Note, this uses a different formula from the one in pyPIPS_merge.py that computes dewpoint for the card data
# but it should be close enough. TODO: go back and make this consistent.
dewpoint = (thermo.calTdfromRH(onesec_rt_ds['pressure'] * 100.,
                               onesec_rt_ds['slowtemp'] + 273.15,
                               onesec_rt_ds['RH'] / 100.) - 273.15)

fasttemp = onesec_rt_ds['fasttemp']
RH_derived = 100. * (np.exp((17.625 * dewpoint) / (243.04 + dewpoint)) /
                     np.exp((17.625 * fasttemp) / (243.04 + fasttemp)))

onesec_rt_ds['dewpoint'] = dewpoint
onesec_rt_ds['RH_derived'] = RH_derived

In [None]:
# Now, reindex both one-sec datasets with the full set of times
onesec_rt_full_ds = onesec_rt_ds.reindex({'time': all_onesec_times})
onesec_card_full_ds = onesec_card_ds.reindex({'time': all_onesec_times})

In [None]:
# Now attempt to merge the data from the two sources to fill in the (non-mutual) gaps
onesec_merged_ds = onesec_rt_ds.combine_first(onesec_card_ds)

In [None]:
# Now, reindex merged dataset with the full set of times
onesec_merged_full_ds = onesec_merged_ds.reindex({'time': all_onesec_times})

In [None]:
# Recompute some thermodynamic parameters for the newly merged onesec data
onesec_merged_full_ds = pips.calc_thermo(onesec_merged_full_ds)

In [None]:
# Make some plots of the one-sec data. Start with T/Td
# Real-time data

fig_t_td, ax_t_td = plt.subplots(figsize=(10, 6))
plottimes_onesec = [all_onesec_times.to_pydatetime()]
# Temperature and Dewpoint
Tmin = np.nanmin(onesec_rt_full_ds['dewpoint'].values)
Tmax = np.nanmax(onesec_rt_full_ds['slowtemp'].values)
fields_to_plot_onesec = [onesec_rt_full_ds['slowtemp'].values, onesec_rt_full_ds['dewpoint'].values]
temp_params = pm.temp_params.copy()
dewpoint_params = pm.dewpoint_params.copy()
temp_params['plotmin'] = Tmin - 5.0
dewpoint_params['plotmin'] = Tmin - 5.0
field_parameters_onesec = [temp_params, dewpoint_params]
ax_t_td = pm.plotmeteogram(
    ax_t_td,
    plottimes_onesec,
    fields_to_plot_onesec,
    field_parameters_onesec)
temp_dewp_ax_params['axeslimits'] = [[plottimes_onesec[0][0], plottimes_onesec[0][-1]],
                                     [Tmin - 5.0, Tmax + 5.0]]
ax_t_td, = pm.set_meteogram_axes([ax_t_td], [temp_dewp_ax_params])

In [None]:
# Make some plots of the one-sec data. Start with T/Td
# Card data

fig_t_td, ax_t_td = plt.subplots(figsize=(10, 6))
plottimes_onesec = [all_onesec_times.to_pydatetime()]
# Temperature and Dewpoint
Tmin = np.nanmin(onesec_card_full_ds['dewpoint'].values)
Tmax = np.nanmax(onesec_card_full_ds['slowtemp'].values)
fields_to_plot_onesec = [onesec_card_full_ds['slowtemp'].values, onesec_card_full_ds['dewpoint'].values]
temp_params = pm.temp_params.copy()
dewpoint_params = pm.dewpoint_params.copy()
temp_params['plotmin'] = Tmin - 5.0
dewpoint_params['plotmin'] = Tmin - 5.0
field_parameters_onesec = [temp_params, dewpoint_params]
ax_t_td = pm.plotmeteogram(
    ax_t_td,
    plottimes_onesec,
    fields_to_plot_onesec,
    field_parameters_onesec)

# tmin = datetime(2023, 3, 3, 8, 40, 0)
# tmax = datetime(2023, 3, 3, 8, 50, 0)
tmin = plottimes_onesec[0][0]
tmax = plottimes_onesec[0][-1]

temp_dewp_ax_params['axeslimits'] = [[tmin, tmax],
                                     [Tmin - 5.0, Tmax + 5.0]]
ax_t_td, = pm.set_meteogram_axes([ax_t_td], [temp_dewp_ax_params])

In [None]:
# Card data without the missing times (so data are interpolated between missing times)

fig_t_td, ax_t_td = plt.subplots(figsize=(10, 6))
plottimes_onesec = [onesec_card_ds['time'].to_index().to_pydatetime()]
# Temperature and Dewpoint
Tmin = np.nanmin(onesec_card_ds['dewpoint'].values)
Tmax = np.nanmax(onesec_card_ds['slowtemp'].values)
fields_to_plot_onesec = [onesec_card_ds['slowtemp'].values, onesec_card_ds['dewpoint'].values]
temp_params = pm.temp_params.copy()
dewpoint_params = pm.dewpoint_params.copy()
temp_params['plotmin'] = Tmin - 5.0
dewpoint_params['plotmin'] = Tmin - 5.0
field_parameters_onesec = [temp_params, dewpoint_params]
ax_t_td = pm.plotmeteogram(
    ax_t_td,
    plottimes_onesec,
    fields_to_plot_onesec,
    field_parameters_onesec)

# tmin = datetime(2023, 3, 3, 8, 40, 0)
# tmax = datetime(2023, 3, 3, 8, 50, 0)
tmin = plottimes_onesec[0][0]
tmax = plottimes_onesec[0][-1]

temp_dewp_ax_params['axeslimits'] = [[tmin, tmax],
                                     [Tmin - 5.0, Tmax + 5.0]]
ax_t_td, = pm.set_meteogram_axes([ax_t_td], [temp_dewp_ax_params])

In [None]:
# Plot T/Td for merged dataset

fig_t_td, ax_t_td = plt.subplots(figsize=(10, 6))
plottimes_onesec = [onesec_merged_ds['time'].to_index().to_pydatetime()]
# Temperature and Dewpoint
Tmin = np.nanmin(onesec_merged_ds['dewpoint'].values)
Tmax = np.nanmax(onesec_merged_ds['slowtemp'].values)
fields_to_plot_onesec = [onesec_merged_ds['slowtemp'].values, onesec_merged_ds['dewpoint'].values]
temp_params = pm.temp_params.copy()
dewpoint_params = pm.dewpoint_params.copy()
temp_params['plotmin'] = Tmin - 5.0
dewpoint_params['plotmin'] = Tmin - 5.0
field_parameters_onesec = [temp_params, dewpoint_params]
ax_t_td = pm.plotmeteogram(
    ax_t_td,
    plottimes_onesec,
    fields_to_plot_onesec,
    field_parameters_onesec)

# tmin = datetime(2023, 3, 3, 8, 40, 0)
# tmax = datetime(2023, 3, 3, 8, 50, 0)
tmin = plottimes_onesec[0][0]
tmax = plottimes_onesec[0][-1]

temp_dewp_ax_params['axeslimits'] = [[tmin, tmax],
                                     [Tmin - 5.0, Tmax + 5.0]]
ax_t_td, = pm.set_meteogram_axes([ax_t_td], [temp_dewp_ax_params])

In [None]:
# Plot T/Td for merged dataset (with full times)

fig_t_td, ax_t_td = plt.subplots(figsize=(10, 6))
plottimes_onesec = [onesec_merged_full_ds['time'].to_index().to_pydatetime()]
# Temperature and Dewpoint
Tmin = np.nanmin(onesec_merged_full_ds['dewpoint'].values)
Tmax = np.nanmax(onesec_merged_full_ds['slowtemp'].values)
fields_to_plot_onesec = [onesec_merged_full_ds['slowtemp'].values, onesec_merged_full_ds['dewpoint'].values]
temp_params = pm.temp_params.copy()
dewpoint_params = pm.dewpoint_params.copy()
temp_params['plotmin'] = Tmin - 5.0
dewpoint_params['plotmin'] = Tmin - 5.0
field_parameters_onesec = [temp_params, dewpoint_params]
ax_t_td = pm.plotmeteogram(
    ax_t_td,
    plottimes_onesec,
    fields_to_plot_onesec,
    field_parameters_onesec)

# tmin = datetime(2023, 3, 3, 8, 40, 0)
# tmax = datetime(2023, 3, 3, 8, 50, 0)
tmin = plottimes_onesec[0][0]
tmax = plottimes_onesec[0][-1]

temp_dewp_ax_params['axeslimits'] = [[tmin, tmax],
                                     [Tmin - 5.0, Tmax + 5.0]]
ax_t_td, = pm.set_meteogram_axes([ax_t_td], [temp_dewp_ax_params])

In [None]:
# Now, figure out what times are missing from the data for the ten-sec data.
# First find earliest start and latest end times
tensec_card_starttime = parsivel_combined_card_ds['time'][0].values
tensec_card_endtime = parsivel_combined_card_ds['time'][-1].values
spectrum_rt_starttime = spectrum_rt_ds['time'][0].values
spectrum_rt_endtime = spectrum_rt_ds['time'][-1].values
telegram_rt_starttime = telegram_rt_ds['time'][0].values
telegram_rt_endtime = telegram_rt_ds['time'][-1].values
ND_rt_starttime = ND_rt_ds['time'][0].values
ND_rt_endtime = ND_rt_ds['time'][-1].values

starttime_tensec = min(tensec_card_starttime, spectrum_rt_starttime, telegram_rt_starttime, ND_rt_starttime)
endtime_tensec = max(tensec_card_endtime, spectrum_rt_endtime, telegram_rt_endtime, ND_rt_endtime)

print(starttime_tensec, endtime_tensec)

# Then, create a new index of all times at 10-s intervals between the two

all_tensec_times = xr.date_range(starttime_tensec, endtime_tensec, freq='10S')
print(all_tensec_times)
print(all_tensec_times[0], all_tensec_times[-1])

print(ND_rt_ds['time'])
print(spectrum_rt_ds['time'])
print(parsivel_combined_card_ds['time'])

In [None]:
missing_times_spectrum_rt = np.array([False if time in spectrum_rt_ds.indexes['time'] 
                                      else True for time in all_tensec_times])

missing_times_telegram_rt = np.array([False if time in telegram_rt_ds.indexes['time'] 
                                      else True for time in all_tensec_times])

missing_times_ND_rt = np.array([False if time in ND_rt_ds.indexes['time'] 
                                else True for time in all_tensec_times])

missing_times_tensec_card = np.array([False if time in parsivel_combined_card_ds.indexes['time'] 
                                      else True for time in all_tensec_times])

missing_times_tensec_both = missing_times_spectrum_rt & missing_times_tensec_card

In [None]:
all(missing_times_spectrum_rt == missing_times_ND_rt)

In [None]:
fig, ax = plt.subplots()

ax.plot(all_tensec_times, missing_times_spectrum_rt, label='Real time data')
ax.plot(all_tensec_times, missing_times_tensec_card, label='Card data')
ax.legend(loc='best')

In [None]:
fig, ax = plt.subplots()

ax.plot(all_tensec_times, missing_times_tensec_both, label='Both datasets')
ax.legend(loc='best')

In [None]:
# Now reindex all datasets with the full set of times

ND_rt_ds_full = ND_rt_ds.reindex({'time': all_tensec_times})
spectrum_rt_ds_full = spectrum_rt_ds.reindex({'time': all_tensec_times})
telegram_rt_ds_full = telegram_rt_ds.reindex({'time': all_tensec_times})
parsivel_combined_card_ds_full = parsivel_combined_card_ds.reindex({'time': all_tensec_times})

In [None]:
# Attempt to merge the data to fill in gaps

# Number concentration
ND_rt_da = ND_rt_ds['ND']
ND_card_da = parsivel_combined_card_ds['ND']

# telegram stuff
precipintensity_rt_da = telegram_rt_ds['rain rate (mm per hr)']
precipaccum_rt_da = telegram_rt_ds['rain accumulation (mm)']
parsivel_dBZ_rt_da = telegram_rt_ds['radar reflectivity (dBZ)']
sample_interval_rt_da = telegram_rt_ds['sample interval']
signal_amplitude_rt_da = telegram_rt_ds['signal amplitude']
pcount_rt_da = telegram_rt_ds['particle count']
sensor_temp_rt_da = telegram_rt_ds['sensor temp']
pvoltage_rt_da = telegram_rt_ds['power supply voltage']

precipintensity_card_da = parsivel_combined_card_ds['precipintensity']
precipaccum_card_da = parsivel_combined_card_ds['precipaccum']
parsivel_dBZ_card_da = parsivel_combined_card_ds['parsivel_dBZ']
sample_interval_card_da = parsivel_combined_card_ds['sample_interval']
signal_amplitude_card_da = parsivel_combined_card_ds['signal_amplitude']
pcount_card_da = parsivel_combined_card_ds['pcount']
sensor_temp_card_da = parsivel_combined_card_ds['sensor_temp']
pvoltage_card_da = parsivel_combined_card_ds['pvoltage']

# spectrum
spectrum_rt_da = spectrum_rt_ds['spectrum']
spectrum_card_da = parsivel_combined_card_ds['VD_matrix']

In [None]:
ND_da_merged = ND_rt_da.combine_first(ND_card_da)
precipintensity_merged_da = precipintensity_rt_da.combine_first(precipintensity_card_da)
precipaccum_merged_da = precipaccum_rt_da.combine_first(precipaccum_card_da)
parsivel_dBZ_merged_da = parsivel_dBZ_rt_da.combine_first(parsivel_dBZ_card_da)
sample_interval_merged_da = sample_interval_rt_da.combine_first(sample_interval_card_da)
signal_amplitude_merged_da = signal_amplitude_rt_da.combine_first(signal_amplitude_card_da)
pcount_merged_da = pcount_rt_da.combine_first(pcount_card_da)
sensor_temp_merged_da = sensor_temp_rt_da.combine_first(sensor_temp_card_da)
pvoltage_merged_da = pvoltage_rt_da.combine_first(pvoltage_card_da)
spectrum_merged_da = spectrum_rt_da.combine_first(spectrum_card_da)

In [None]:
ND_da_merged_full = ND_da_merged.reindex({'time': all_tensec_times})
precipintensity_merged_da_full = precipintensity_merged_da.reindex({'time': all_tensec_times})
precipaccum_merged_da_full = precipaccum_merged_da.reindex({'time': all_tensec_times})
parsivel_dBZ_merged_da_full = parsivel_dBZ_merged_da.reindex({'time': all_tensec_times})
sample_interval_merged_da_full = sample_interval_merged_da.reindex({'time': all_tensec_times})
signal_amplitude_merged_da_full = signal_amplitude_merged_da.reindex({'time': all_tensec_times})
pcount_merged_da_full = pcount_merged_da.reindex({'time': all_tensec_times})
sensor_temp_merged_da_full = sensor_temp_merged_da.reindex({'time': all_tensec_times})
pvoltage_merged_da_full = pvoltage_merged_da.reindex({'time': all_tensec_times})
spectrum_merged_da_full = spectrum_merged_da.reindex({'time': all_tensec_times})

In [None]:
# Now that we have all the data merged that we can, we need to start putting it back together in preparation
# for dumping back to disk

parsivel_combined_merged_full_ds = parsivel_combined_card_ds_full.copy()
parsivel_combined_merged_full_ds['ND'] = ND_da_merged_full
parsivel_combined_merged_full_ds['precipintensity'] = precipintensity_merged_da_full
parsivel_combined_merged_full_ds['precipaccum'] = precipaccum_merged_da_full
parsivel_combined_merged_full_ds['parsivel_dBZ'] = parsivel_dBZ_merged_da_full
parsivel_combined_merged_full_ds['sample_interval'] = sample_interval_merged_da_full
parsivel_combined_merged_full_ds['signal_amplitude'] = signal_amplitude_merged_da_full
parsivel_combined_merged_full_ds['pcount'] = pcount_merged_da_full
parsivel_combined_merged_full_ds['sensor_temp'] = sensor_temp_merged_da_full
parsivel_combined_merged_full_ds['pvoltage'] = pvoltage_merged_da_full
parsivel_combined_merged_full_ds['VD_matrix'] = spectrum_merged_da_full

In [None]:
# Now resample the PIPS onesec data to 10-sec intervals and replace the existing 10-sec resampled data
# in the parsivel combined dataset

# Resample conventional data to the parsivel times
PSD_datetimes = pips.get_PSD_datetimes(parsivel_combined_merged_full_ds['VD_matrix'])
sec_offset = PSD_datetimes[0].second
offset_str = pips.get_interval_str(sec_offset)
resample_interval = 10
conv_resampled_ds = pips.resample_conv_da('PIPS', resample_interval, sec_offset, onesec_merged_full_ds, 
                                          gusts=True, gustintvstr='3S')    

In [None]:
conv_resampled_ds

In [None]:
# Replace all original resampled conventional data variables in the original parsivel combined dataset with
# the new ones

for varname in conv_resampled_ds:
    parsivel_combined_merged_full_ds[varname] = conv_resampled_ds[varname]

In [None]:
# Quick DSD meteogram for merged data
fig, ax = plt.subplots(figsize=(10, 6))
plottimes_tmp = ND_da_merged_full.coords['time'].to_index().to_pydatetime()
# Prepend an additional at the beginning of the array so that pcolor sees this as the
# edges of the DSD intervals.
plottimes = np.insert(plottimes_tmp, 0, plottimes_tmp[0] - timedelta(seconds=10))
plottimes = [plottimes]

ND_arr = ND_da_merged_full.values.T
logND_arr = np.ma.log10(ND_arr)
fields_to_plot = [logND_arr]
field_parameters = [log_ND_params]
ax = pm.plotmeteogram(ax, plottimes, fields_to_plot, field_parameters,
                      yvals=[diameter_edges] * len(fields_to_plot))
ax, = pm.set_meteogram_axes([ax], [log_ND_ax_params])

In [None]:
# Quick DSD meteogram for real-time data
fig, ax = plt.subplots(figsize=(10, 6))
plottimes_tmp = ND_rt_ds_full['time'].to_index().to_pydatetime()
# Prepend an additional at the beginning of the array so that pcolor sees this as the
# edges of the DSD intervals.
plottimes = np.insert(plottimes_tmp, 0, plottimes_tmp[0] - timedelta(seconds=10))
plottimes = [plottimes]

ND_rt_da = ND_rt_ds_full['ND']

ND_rt_arr = ND_rt_da.values.T
logND_rt_arr = np.ma.log10(ND_rt_arr)
fields_to_plot = [logND_rt_arr]
field_parameters = [log_ND_params]
ax = pm.plotmeteogram(ax, plottimes, fields_to_plot, field_parameters,
                      yvals=[diameter_edges] * len(fields_to_plot))
ax, = pm.set_meteogram_axes([ax], [log_ND_ax_params])

In [None]:
# Quick DSD meteogram for card data
fig, ax = plt.subplots(figsize=(10, 6))
plottimes_tmp = parsivel_combined_card_ds_full['time'].to_index().to_pydatetime()
# Prepend an additional at the beginning of the array so that pcolor sees this as the
# edges of the DSD intervals.
plottimes = np.insert(plottimes_tmp, 0, plottimes_tmp[0] - timedelta(seconds=10))
plottimes = [plottimes]

ND_card_da = parsivel_combined_card_ds_full['ND']

ND_card_arr = ND_card_da.values.T
logND_card_arr = np.ma.log10(ND_card_arr)
fields_to_plot = [logND_card_arr]
field_parameters = [log_ND_params]
ax = pm.plotmeteogram(ax, plottimes, fields_to_plot, field_parameters,
                      yvals=[diameter_edges] * len(fields_to_plot))
ax, = pm.set_meteogram_axes([ax], [log_ND_ax_params])

In [None]:
# Test plot of resampled data to make sure it looks ok
# Plot T/Td for merged dataset (with full times)

fig_t_td, ax_t_td = plt.subplots(figsize=(10, 6))
plottimes_tensec = [parsivel_combined_merged_full_ds['time'].to_index().to_pydatetime()]
# Temperature and Dewpoint
Tmin = np.nanmin(parsivel_combined_merged_full_ds['dewpoint'].values)
Tmax = np.nanmax(parsivel_combined_merged_full_ds['slowtemp'].values)
fields_to_plot_tensec = [parsivel_combined_merged_full_ds['slowtemp'].values, 
                         parsivel_combined_merged_full_ds['dewpoint'].values]
temp_params = pm.temp_params.copy()
dewpoint_params = pm.dewpoint_params.copy()
temp_params['plotmin'] = Tmin - 5.0
dewpoint_params['plotmin'] = Tmin - 5.0
field_parameters_tensec = [temp_params, dewpoint_params]
ax_t_td = pm.plotmeteogram(
    ax_t_td,
    plottimes_tensec,
    fields_to_plot_tensec,
    field_parameters_tensec)

# tmin = datetime(2023, 3, 3, 8, 40, 0)
# tmax = datetime(2023, 3, 3, 8, 50, 0)
tmin = plottimes_tensec[0][0]
tmax = plottimes_tensec[0][-1]

temp_dewp_ax_params['axeslimits'] = [[tmin, tmax],
                                     [Tmin - 5.0, Tmax + 5.0]]
ax_t_td, = pm.set_meteogram_axes([ax_t_td], [temp_dewp_ax_params])

In [None]:
onesec_card_ds

In [None]:
onesec_merged_full_ds

In [None]:
parsivel_combined_merged_full_ds

In [None]:
parsivel_combined_card_ds

In [None]:
# Almost done. Need to copy all attributes from the original datasets to the new ones. Sometimes they get
# lost for some reason.

# Global attributes
onesec_merged_full_ds.attrs = onesec_card_ds.attrs
parsivel_combined_merged_full_ds.attrs = parsivel_combined_card_ds.attrs

# Variable attributes
for varname in onesec_merged_full_ds:
    onesec_merged_full_ds[varname].attrs = onesec_card_ds[varname].attrs

for varname in parsivel_combined_merged_full_ds:
    parsivel_combined_merged_full_ds[varname].attrs = parsivel_combined_card_ds[varname].attrs

In [None]:
onesec_merged_full_ds

In [None]:
# Now we are ready to dump everything back to disk

parsivel_combined_output_filename = f'parsivel_combined_{deployment_name}_{PIPS_name}_10s.nc'
parsivel_combined_output_path = os.path.join(card_netcdf_output_dir, parsivel_combined_output_filename)

onesec_output_filename = f'conventional_raw_{deployment_name}_{PIPS_name}.nc'
onesec_output_path = os.path.join(card_netcdf_output_dir, onesec_output_filename)

print("Saving {}".format(onesec_output_path))
onesec_merged_full_ds.to_netcdf(onesec_output_path)
print("Saving {}".format(parsivel_combined_output_path))
parsivel_combined_merged_full_ds.to_netcdf(parsivel_combined_output_path)