# A notebook to perform QC on the PIPS T and RH/Td observations

In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
import numpy.ma as ma
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.ticker as ticker
import matplotlib.dates as dates
from mpl_toolkits.axes_grid1 import ImageGrid,make_axes_locatable,host_subplot
#from mpl_toolkits.basemap import Basemap
from datetime import datetime, timedelta
import sys
import os
import pyPIPS.utils as utils
import pyPIPS.thermolib as thermo
import pyPIPS.DSDlib as dsd
#import pyPIPS.disdrometer_module as dis
import pyPIPS.plotmodule as PIPSplot
#import pyPIPS.simulator as sim
import pyPIPS.pips_io as pipsio
import pyPIPS.PIPS as pips
import pyPIPS.parsivel_params as pp
import pyPIPS.parsivel_qc as pqc
import pyPIPS.polarimetric as dualpol
#from pyCRMtools.modules import plotmodule as plotmod
from pyCRMtools.modules import utils as CRMutils
# from pyCRMtools.pycaps import arps_read
# from pyCRMtools.pycaps import pycaps_fields
# from pyCRMtools.pycaps import calvars_radar as radar
import pandas as pd
import xarray as xr
import glob
import numpy.random as random
from scipy.stats import gamma, uniform
from scipy.stats.mstats import zscore
from scipy.special import gamma as gammafunc
from scipy import ndimage
from metpy.plots import StationPlot
from metpy.calc import wind_components
from metpy.cbook import get_test_data
from metpy.plots import StationPlot
from metpy.plots.wx_symbols import current_weather, sky_cover
from metpy.units import units
from cycler import cycler
import warnings
warnings.simplefilter('ignore')
%matplotlib notebook

In [None]:
# plt.style.use('seaborn-v0_8-bright')

In [None]:
# Read in the original PIPS netcdf files

# PIPS_input_base_dir = '/Users/dawson29/Projects/PERiLS/obsdata/2022/PIPS_data/'
# PIPS_output_base_dir = '/Users/dawson29/Projects/PERiLS/obsdata/2022/PIPS_data_for_EOL/'

PIPS_base_dir = '/Users/dawson29/Dropbox/Projects/PERiLS/obsdata/2023/'
# PIPS_base_dir = '/Users/dawson29/Dropbox/Projects/PERiLS/obsdata/2023/'
# PIPS_base_dir = '/Users/dawson29/Projects/PERiLS/obsdata/2023/PIPS_data/'
# PIPS_base_dir = '/Users/dawson29/Projects/PERiLS/obsdata/2022/PIPS_data/'

# deployment_name = 'IOP2_030323' # '031623_mass_test' # 'IOP2_030323' # 'IOP5_040523' # 'IOP4_033123' # 'IOP3_032423' # 'IOP3_040522'
# PIPS_input_dir = os.path.join(PIPS_base_dir, deployment_name, 'netcdf_v110823')
# PIPS_output_dir = os.path.join(PIPS_base_dir, deployment_name, 'netcdf_T_Td_QC_v110823')

deployment_name = '022723_mass_test'
PIPS_input_dir = os.path.join(PIPS_base_dir, deployment_name, 'netcdf')
PIPS_output_dir = os.path.join(PIPS_base_dir, deployment_name, 'netcdf_thermoQC')

# deployment_name = 'IOP2_033022' # '031623_mass_test' # 'IOP2_030323' # 'IOP5_040523' # 'IOP4_033123' # 'IOP3_032423' # 'IOP3_040522'
# PIPS_input_dir = os.path.join(PIPS_base_dir, deployment_name, 'netcdf')
# PIPS_output_dir = os.path.join(PIPS_base_dir, deployment_name, 'netcdf_T_Td_QC')

if not os.path.exists(PIPS_output_dir):
    os.makedirs(PIPS_output_dir)

# IOP1 2022
# PIPS_names = ['PIPS1A', 'PIPS1B', 'PIPS2A', 'PIPS2B']
# IOP2 2022
# PIPS_names = ['PIPS1A', 'PIPS1B', 'PIPS2A', 'PIPS3B']
# IOP3 2022
PIPS_names = ['PIPS1A', 'PIPS1B', 'PIPS2A', 'PIPS2B', 'PIPS3A', 'PIPS3B']
# IOP3 2023
# PIPS_names = ['PIPS2A', 'PIPS3A']
# IOP2, IOP4 or IOP5 2023
# PIPS_names = ['PIPS1A', 'PIPS1B', 'PIPS2A', 'PIPS2B', 'PIPS3A', 'PIPS3B']
parsivel_interval = 10
intervalstr = '10S'

parsivel_filenames = ['parsivel_combined_{}_{}_{:d}s.nc'.format(deployment_name, PIPS_name, parsivel_interval)
                      for PIPS_name in PIPS_names]
parsivel_filepaths = [os.path.join(PIPS_input_dir, parsivel_filename) for parsivel_filename in parsivel_filenames]
output_parsivel_filepaths = [os.path.join(PIPS_output_dir, parsivel_filename) 
                             for parsivel_filename in parsivel_filenames]
conv_filenames = ['conventional_raw_{}_{}.nc'.format(deployment_name, PIPS_name) for PIPS_name in PIPS_names]
conv_filepaths = [os.path.join(PIPS_input_dir, conv_filename) for conv_filename in conv_filenames]
output_conv_filepaths = [os.path.join(PIPS_output_dir, conv_filename) for conv_filename in conv_filenames]
parsivel_ds_dict = {}
conv_ds_dict = {}
for PIPS_name, parsivel_filepath, conv_filepath in zip(PIPS_names, parsivel_filepaths, conv_filepaths):
    try:
        parsivel_ds_dict[PIPS_name] = xr.load_dataset(parsivel_filepath)
    except:
        parsivel_ds_dict[PIPS_name] = None
    conv_ds_dict[PIPS_name] = xr.load_dataset(conv_filepath)

In [None]:
# default_cycler = (cycler(color=['r', 'orange', 'y', 'g', 'b', 'purple']))

# plt.rc('lines', linewidth=4)
# plt.rc('axes', prop_cycle=default_cycler)

In [None]:
PIPS_to_comp = PIPS_names

start_times = []
stop_times = []
for PIPS_name in PIPS_to_comp:
    start_times.append(conv_ds_dict[PIPS_name]['time'][0])
    stop_times.append(conv_ds_dict[PIPS_name]['time'][-1])
    
min_time = min(start_times)
max_time = max(stop_times)

print(min_time, max_time)

In [None]:
time_start = min_time # '2023-03-17T8:30:00'
time_stop = max_time # '2023-03-17T9:30:00'

In [None]:
# Let's look at a comparison of the fast and slow temps for this mass test

fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['fasttemp'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_fastT', 
                                                                    ls='None', marker='o', ms=1., alpha=0.5) 
    
    ax.legend(loc='best')

In [None]:
# All the fast temps look to be pretty close to each other with no obvious outliers on average, so we can
# proceed to take a simple ensemble average of all the PIPS sensors as a function of time and use the 
# difference between the average and each PIPS in turn to correct them.

all_PIPS_fastT_list = [conv_ds_dict[PIPS_name]['fasttemp'] for PIPS_name in PIPS_to_comp]
all_PIPS_fastT_ds = xr.concat(all_PIPS_fastT_list, dim='PIPS')

avg_PIPS_fastT_ds = all_PIPS_fastT_ds.mean(dim='PIPS', skipna=True)


In [None]:
all_PIPS_fastT_ds

In [None]:
avg_PIPS_fastT_ds

In [None]:
# Plot the PIPS-averaged fast temp

fig, ax = plt.subplots()

avg_PIPS_fastT_ds.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'PIPS_avg_fastT', 
                                                              ls='None', marker='o', ms=1., alpha=0.5) 
ax.legend(loc='best')

In [None]:
# Ok, now compute the differences of each PIPS from the PIPS mean at each time and then take the
# average of the diffs for each PIPS over time. This time-averaged diff for each PIPS will be used as a
# constant offset to correct the timeseries for each. This is not perfect because it assumes that any bias
# is constant and independent of temperature, etc. but it's good enough for most purposes.

# First compute the diffs
diff_T_dict = {}

for PIPS_name in PIPS_to_comp:
    diff_T = conv_ds_dict[PIPS_name]['fasttemp'] - avg_PIPS_fastT_ds
    diff_T_dict[PIPS_name] = diff_T

In [None]:
# Plot the diffs. For the most part they are pretty small and within 0.2 deg of each other
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    diff_T = diff_T_dict[PIPS_name]
    diff_T.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_diff_fastT', 
                                                       ls='None', marker='o', ms=1., alpha=0.5)

ax.legend()

In [None]:
# Now find the time-mean diffs and then subtract them from the original fasttemp timeseries for each PIPS
# and store them in a new "corrected" fasttemp variable
for PIPS_name in PIPS_to_comp:
    diff_T = diff_T_dict[PIPS_name]
    mean_diff_T = diff_T.mean().values
    print(PIPS_name, mean_diff_T)
    conv_ds_dict[PIPS_name]['fasttemp_corrected'] = conv_ds_dict[PIPS_name]['fasttemp'].copy() - mean_diff_T
    # Add the value subtracted to the attributes so we know that we modified it
    conv_ds_dict[PIPS_name]['fasttemp_corrected'].attrs['bias_subtracted'] = mean_diff_T

In [None]:
# Ok, now plot the bias-corrected fasttemps


fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['fasttemp_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, 
                                                                              label=f'{PIPS_name}_fastT_corrected', 
                                                                              ls='None', marker='o', ms=1., 
                                                                              alpha=0.5)    
# plt.gca().set_prop_cycle(None)

# for PIPS_name in PIPS_to_comp[:1]:
#     conv_ds = conv_ds_dict[PIPS_name]
#     conv_ds['fasttemp'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_fastT', 
#                                                                     ls='None', marker='x', ms=5., alpha=0.75)
    
ax.legend(loc='best')

In [None]:
# Ok, now that we've handled the fasttemps, lets move on to the slowtemps
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['slowtemp'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_slowT', 
                                                                    ls='None', marker='o', ms=1., alpha=0.5) 
    
    ax.legend(loc='best')

In [None]:
# Ok, in this case there is a clear clustering of PIPS3A and PIPS3B on the one hand, which read about a degree
# higher than the others, which are all clustered around a lower temp (although PIPS1B seems to have a
# systematic cold bias by about 0.25 deg). Let's dig a bit deeper by comparing the fast and slow temps for
# each PIPS. This will give us some idea of which PIPS slowtemps are closer to being correct. Then we can use
# that info to figure out how best to correct them

# First, for each PIPS, plot the slow and (corrected) fast temps together. Make separate plots for each PIPS this time
# to avoid clutter

for PIPS_name in PIPS_to_comp:
    fig, ax = plt.subplots()
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['slowtemp'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_slowT', 
                                                                    ls='None', marker='o', ms=1., alpha=0.5)
    conv_ds['fasttemp_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_fastT_corrected', 
                                                                    ls='None', marker='x', ms=1., alpha=0.5)
    
    ax.legend(loc='best')

In [None]:
# The above plots strongly suggest that the biases for each PIPS for the slow temps would be most easily
# corrected by adding a constant offset representing the mean difference between the slow temps and fast temps
# for each PIPS. In other words, it is likely that the corrected fast temps are closest to being the most
# accurate. To really tell for sure we would like to have a well-calibrated reference temperature to compare
# everthing to, but we don't have that in this case. So we will go with the assumption that the fast temps
# are best and correct the slow temps based on them.
# Again, this isn't perfect because the offset may not be constant, but it should be good enough.

# So, let's compute differences between the fast and slow temps for each PIPS and then take the time average
# as before. Then subtract the time-averaged diffs from the original slow temps and store them in a new
# "corrected" slow temp variable

diff_T_dict2 = {}

for PIPS_name in PIPS_to_comp:
    diff_T = conv_ds_dict[PIPS_name]['slowtemp'] - conv_ds_dict[PIPS_name]['fasttemp_corrected']
    diff_T_dict2[PIPS_name] = diff_T

In [None]:
# Plot the time series of the slow-fast temp differences
# Note there are a couple "spikes" in the difference that correspond to where there was a rapid drop in
# temperature. This spike reflects the different time constants of the fast and slow-response temperature
# sensors, but since the time period where this is happening is so short, we can still get by with a simple
# time average of all the diffs. If we really wanted to get fancy, we could probably exclude any period in 
# the average
# where the temperature is changing too much, but it probably wouldn't make much of a difference in this case.

fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    diff_T = diff_T_dict2[PIPS_name]
    diff_T.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_diff_T', 
                                                       ls='None', marker='o', ms=1., alpha=0.5)

ax.legend()

In [None]:
# Compute the mean differences and then subtract from the original slowtemp, storing in a new corrected slowtemp

for PIPS_name in PIPS_to_comp:
    diff_T = diff_T_dict2[PIPS_name]
    mean_diff_T = diff_T.mean().values
    print(PIPS_name, mean_diff_T)
    conv_ds_dict[PIPS_name]['slowtemp_corrected'] = conv_ds_dict[PIPS_name]['slowtemp'].copy() - mean_diff_T
    # Add the value subtracted to the attributes so we know that we modified it
    conv_ds_dict[PIPS_name]['slowtemp_corrected'].attrs['bias_subtracted'] = mean_diff_T
    

In [None]:
# Ok, now plot the bias-corrected slowtemps
# Much better!


fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['slowtemp_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_slowT_corrected', 
                                                                              ls='None', marker='o', ms=1., alpha=0.5)    
# plt.gca().set_prop_cycle(None)

# for PIPS_name in PIPS_to_comp[:1]:
#     conv_ds = conv_ds_dict[PIPS_name]
#     conv_ds['fasttemp'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_fastT', 
#                                                                     ls='None', marker='x', ms=5., alpha=0.75)
    
# ax.legend()

In [None]:
# Now lets do the same for the RH
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['RH'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_RH', 
                                                              ls='None', marker='o', ms=1., alpha=0.5) 
    
    ax.legend(loc='best')

In [None]:
# In this case it looks like there is again a cluster where PIPS 3A and 3B are reading a bit too high,
# while the others are all clustered lower. So in the absence of an independent comparison, we'll assume
# that it's PIPS 3A and 3B that are "wrong" and use the time average of all the others (excluding PIPS3A and
# PIPS3B) to correct everything.

PIPS_to_avg = ['PIPS1A', 'PIPS1B', 'PIPS2A', 'PIPS2B']
PIPS_RH_list = [conv_ds_dict[PIPS_name]['RH'] for PIPS_name in PIPS_to_avg]
PIPS_RH_ds = xr.concat(PIPS_RH_list, dim='PIPS')
avg_PIPS_RH_ds = PIPS_RH_ds.mean(dim='PIPS', skipna=True)

In [None]:
# Plot the PIPS-averaged RH

fig, ax = plt.subplots()

avg_PIPS_RH_ds.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'PIPS_avg_RH', 
                                                           ls='None', marker='o', ms=1., alpha=0.5) 
ax.legend(loc='best')

In [None]:
# Ok, now compute the differences of each PIPS from the PIPS mean at each time and then take the
# average of the diffs for each PIPS over time. This time-averaged diff for each PIPS will be used as a
# constant offset to correct the timeseries for each. This is not perfect because it assumes that any bias
# is constant and independent of temperature, etc. but it's good enough for most purposes.

# First compute the diffs
diff_RH_dict = {}

for PIPS_name in PIPS_to_comp:
    diff_RH = conv_ds_dict[PIPS_name]['RH'] - avg_PIPS_RH_ds
    diff_RH_dict[PIPS_name] = diff_RH

In [None]:
# Plot the diffs.
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    diff_RH = diff_RH_dict[PIPS_name]
    diff_RH.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_diff_RH', 
                                                        ls='None', marker='o', ms=1., alpha=0.5)

ax.legend()

In [None]:
# Now find the time-mean diffs and then subtract them from the original RH timeseries for each PIPS
# and store them in a new "corrected" fasttemp variable
for PIPS_name in PIPS_to_comp:
    diff_RH = diff_RH_dict[PIPS_name]
    mean_diff_RH = diff_RH.mean().values
    print(PIPS_name, mean_diff_RH)
    conv_ds_dict[PIPS_name]['RH_corrected'] = conv_ds_dict[PIPS_name]['RH'].copy() - mean_diff_RH
    # Add the value subtracted to the attributes so we know that we modified it
    conv_ds_dict[PIPS_name]['RH_corrected'].attrs['bias_subtracted'] = mean_diff_RH

In [None]:
# Ok, now plot the bias-corrected RH
# Much better! Still some residual differences in spots but we can (probably) live with them


fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['RH_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, 
                                                                        label=f'{PIPS_name}_RH_corrected', 
                                                                        ls='None', marker='o', ms=1., 
                                                                        alpha=0.5)
ax.legend(loc='best')

In [None]:
# Next we need to check the pressure and correct any biases. Then we need to
# recompute the dewpoint and the "derived" RH using the bias-corrected temperatures, RH, and pressure
# After that, we resample everything to 10-s intervals for the parsivel_ds for each PIPS and save the updated
# netCDF files back to disk!

In [None]:
# Do the same for pressure.

fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['pressure'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_pressure', 
                                                                    ls='None', marker='o', ms=1., alpha=0.5) 
    
    ax.legend(loc='best')

In [None]:
# Looks like the probes all agree very well with each other in regard to pressure so like with the fasttemps
# we can
# proceed to take a simple ensemble average of all the PIPS sensors as a function of time and use the 
# difference between the average and each PIPS in turn to correct them.

all_PIPS_pressure_list = [conv_ds_dict[PIPS_name]['pressure'] for PIPS_name in PIPS_to_comp]
all_PIPS_pressure_ds = xr.concat(all_PIPS_pressure_list, dim='PIPS')

avg_PIPS_pressure_ds = all_PIPS_pressure_ds.mean(dim='PIPS', skipna=True)

In [None]:
# Plot the PIPS-averaged pressure

fig, ax = plt.subplots()

avg_PIPS_pressure_ds.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'PIPS_avg_pressure', 
                                                                 ls='None', marker='o', ms=1., alpha=0.5) 
ax.legend(loc='best')

In [None]:
# Ok, now compute the differences of each PIPS from the PIPS mean at each time and then take the
# average of the diffs for each PIPS over time. This time-averaged diff for each PIPS will be used as a
# constant offset to correct the timeseries for each. This is not perfect because it assumes that any bias
# is constant and independent of pressure, etc. but it's good enough for most purposes.

# First compute the diffs
diff_p_dict = {}

for PIPS_name in PIPS_to_comp:
    diff_p = conv_ds_dict[PIPS_name]['pressure'] - avg_PIPS_pressure_ds
    diff_p_dict[PIPS_name] = diff_p

In [None]:
# Plot the diffs. For the most part they are pretty small and within 0.2 Pa of each other
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    diff_p = diff_p_dict[PIPS_name]
    diff_p.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_diff_pressure', 
                                                       ls='None', marker='o', ms=1., alpha=0.5)

ax.legend()

In [None]:
# Now find the time-mean diffs and then subtract them from the original pressure timeseries for each PIPS
# and store them in a new "corrected" pressure variable
for PIPS_name in PIPS_to_comp:
    diff_p = diff_p_dict[PIPS_name]
    mean_diff_p = diff_p.mean().values
    print(PIPS_name, mean_diff_p)
    conv_ds_dict[PIPS_name]['pressure_corrected'] = conv_ds_dict[PIPS_name]['pressure'].copy() - mean_diff_p
    # Add the value subtracted to the attributes so we know that we modified it
    conv_ds_dict[PIPS_name]['pressure_corrected'].attrs['bias_subtracted'] = mean_diff_p

In [None]:
# Ok, now plot the bias-corrected pressure


fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['pressure_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, 
                                                                              label=f'{PIPS_name}_pressure_corrected', 
                                                                              ls='None', marker='o', ms=1., 
                                                                              alpha=0.5)    

ax.legend(loc='best')

In [None]:
# Now, we need to recompute the dewpoint and RH_derived using the bias-corrected values from above

for PIPS_name in PIPS_to_comp:
    pressure = conv_ds_dict[PIPS_name]['pressure_corrected']
    slowtemp = conv_ds_dict[PIPS_name]['slowtemp_corrected']
    fasttemp = conv_ds_dict[PIPS_name]['fasttemp_corrected']
    RH = conv_ds_dict[PIPS_name]['RH_corrected']
    dewpoint = thermo.calTdfromRH(pressure * 100., slowtemp + 273.15, RH / 100.) - 273.15
#     dewpoint.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_dewpoint', 
#                                                          ls='None', marker='o', ms=1., alpha=0.5)
    RH_derived = thermo.calRH(pressure * 100., fasttemp + 273.15, dewpoint + 273.15) * 100.
    
    conv_ds_dict[PIPS_name]['dewpoint_corrected'] = conv_ds_dict[PIPS_name]['dewpoint'].copy()
    conv_ds_dict[PIPS_name]['dewpoint_corrected'].data = dewpoint
    
    conv_ds_dict[PIPS_name]['RH_derived_corrected'] = conv_ds_dict[PIPS_name]['RH_derived'].copy()
    conv_ds_dict[PIPS_name]['RH_derived_corrected'].data = RH_derived

In [None]:
# Plot the timeseries of RH_derived_corrected
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['RH_derived_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_RH_derived_corrected', 
                                                              ls='None', marker='o', ms=1., alpha=0.5)    
# plt.gca().set_prop_cycle(None)

# for PIPS_name in PIPS_to_comp:
#     conv_ds = conv_ds_dict[PIPS_name]
#     conv_ds['RH_derived_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_RH_derived_cor', 
#                                                                     ls='None', marker='x', ms=5., alpha=0.75)
    
ax.legend()

In [None]:
# Plot the timeseries of dewpoint_corrected
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['dewpoint_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_dewpoint'), 

ax.legend()

In [None]:
# Recompute some of the derived thermodynamic parameters (pt, qv, rho) using the new corrected vars

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds = pips.calc_thermo(conv_ds, p_var='pressure_corrected', T_var='fasttemp_corrected', 
                               RH_var='RH_derived_corrected')

In [None]:
# Plot the timeseries of pt_corrected to check
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['pt_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_pt'), 

ax.legend()

In [None]:
# Now resample the corrected timeseries to the parsivel times

corrected_varnames = ['fasttemp_corrected', 'slowtemp_corrected', 'RH_corrected', 'pressure_corrected',
                      'dewpoint_corrected', 'RH_derived_corrected', 'pt_corrected', 'qv_corrected',
                      'rho_corrected']

for PIPS_name in PIPS_to_comp:
    PSD_datetimes = pips.get_PSD_datetimes(parsivel_ds_dict[PIPS_name]['VD_matrix'])
    sec_offset = PSD_datetimes[0].second
    print(sec_offset)
    offset_str = pips.get_interval_str(sec_offset)
    
    for corrected_varname in corrected_varnames:
        corrected_var = conv_ds_dict[PIPS_name][corrected_varname]
        new_var = corrected_var.resample(time=intervalstr, label='right', closed='right', 
                                         offset=offset_str).mean()
        
        parsivel_ds_dict[PIPS_name][corrected_varname] = new_var
        parsivel_ds_dict[PIPS_name][corrected_varname].attrs = conv_ds_dict[PIPS_name][corrected_varname].attrs

In [None]:
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    parsivel_ds_dict[PIPS_name]['pt_corrected'].plot(ax=ax, label=f'{PIPS_name}_pt_corrected', 
                                                             ls='None', marker='o', ms=1., alpha=0.75)
#     parsivel_ds_dict[PIPS_name]['RH_derived'].plot(ax=ax, label=f'{PIPS_name}_RH_derived', 
#                                                    ls='None', marker='x', ms=5., alpha=0.5)
    
ax.legend()

In [None]:
# Now save to new output directory
for PIPS_name, output_parsivel_filepath, output_conv_filepath in zip(PIPS_names, 
                                                                     output_parsivel_filepaths, 
                                                                     output_conv_filepaths):
    if PIPS_name in PIPS_to_comp:
        print(PIPS_name)
        
        
        print("Saving {}".format(output_parsivel_filepath))
        parsivel_ds_dict[PIPS_name].to_netcdf(output_parsivel_filepath)
        print("Saving {}".format(output_conv_filepath))
        conv_ds_dict[PIPS_name].to_netcdf(output_conv_filepath)

In [None]:
# Finally, print out the offsets for the original measured variables for use in another notebook to correct the data
# for the IOPs

corrected_vars = ['fasttemp_corrected', 'slowtemp_corrected', 'RH_corrected', 'pressure_corrected']

print("Biases to subtract:")

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    print(PIPS_name)
    for corrected_var in corrected_vars:
        bias = conv_ds[corrected_var].bias_subtracted
        print(f'{corrected_var}: {str(bias)}')

In [None]:
# OLD CELLS BELOW

In [None]:
# All the PIPS show high freq variations in slowT of roughly 0.5 deg C for some weird reason. 
# You can see that here
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['slowtemp'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_slowT', 
                                                                    ls='None', marker='o', ms=1., alpha=0.5) 
    
    ax.legend(loc='best')


In [None]:
# To clean it up, let's try this solution from ChatGPT that uses a butterworth low-pass filter

from scipy.signal import butter, filtfilt

# Load your dataset
# ds = xr.open_dataset('path_to_your_dataset.nc')

# For demonstration, let's create a sample DataArray
# time = pd.date_range('2000-01-01', periods=200, freq='D')
# data = np.random.rand(200)  # Random data for example
# ds = xr.DataArray(data, coords=[time], dims=['time'])


# Test on PIPS3A
conv_ds = conv_ds_dict['PIPS3A']

slow_temp = conv_ds['slowtemp']

# Set your cutoff frequency (e.g., if your time unit is days, and you want to keep frequencies lower than 0.1/day)
cutoff_frequency = 0.005  # Change as needed (units are in inverse seconds)

# Design a Butterworth low-pass filter
order = 6  # Filter order
sampling_rate = 1  # Inverse of the sampling interval in units of your data
nyquist_rate = 0.5 * sampling_rate
normalized_cutoff = cutoff_frequency / nyquist_rate

print(normalized_cutoff)
b, a = butter(order, normalized_cutoff, btype='low', analog=False)

# Apply the filter
filtered_slow_temp = filtfilt(b, a, slow_temp.values)

# Insert the filtered data back into an xarray DataArray
slow_temp_filt = xr.DataArray(filtered_slow_temp, coords=slow_temp.coords, dims=slow_temp.dims)



In [None]:
# Now plot the filtered data to see what it looks like
fig, ax = plt.subplots()
slow_temp_filt.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_slowT_filt', 
                                                           ls='None', marker='o', ms=1., alpha=0.5)
slow_temp.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_slowT_filt', 
                                                      ls='None', marker='o', ms=1., alpha=0.5)

In [None]:
# Hmmm... this is better but still doesn't quite do what I want. I have to set the cutoff frequency too low
# Let's try a simple running mean

# Apply a running mean with a window size of 60 s
window_size = 60
smoothed_slow_temp = slow_temp.rolling(time=window_size, center=True).mean()

In [None]:
fig, ax = plt.subplots()
smoothed_slow_temp.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_slowT_filt', 
                                                               ls='None', marker='o', ms=1., alpha=0.5)
slow_temp.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_slowT_filt', 
                                                      ls='None', marker='o', ms=1., alpha=0.5)

In [None]:
# Plot the timeseries of slowtemp and fasttemp
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['slowtemp'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_slowT', 
                                                                    ls='None', marker='o', ms=1., alpha=0.5)    
plt.gca().set_prop_cycle(None)

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['fasttemp'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_fastT', 
                                                                    ls='None', marker='x', ms=5., alpha=0.75)
    
ax.legend()

In [None]:
# It looks like 3A and 3B have wonky slow-temp sensors that read too high. Let's take the differences between the
# fast temp and slow temp obs and find the average difference

diff_T_dict = {}

for PIPS_name in PIPS_to_comp:
    diff_T = conv_ds_dict[PIPS_name]['slowtemp'] - conv_ds_dict[PIPS_name]['fasttemp']
    diff_T_dict[PIPS_name] = diff_T


In [None]:
# Plot the time series of the slow-fast temp differences

fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    diff_T = diff_T_dict[PIPS_name]
    diff_T.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_diff_T', 
                                                       ls='None', marker='o', ms=1., alpha=0.5)

ax.legend()

In [None]:
# Compute the mean differences and then subtract from the original slowtemp, storing in a new corrected slowtemp

for PIPS_name in PIPS_to_comp:
    diff_T = diff_T_dict[PIPS_name]
    mean_diff_T = diff_T.mean().values
    print(mean_diff_T)
    conv_ds_dict[PIPS_name]['slowtemp_corrected'] = conv_ds_dict[PIPS_name]['slowtemp'].copy() - mean_diff_T
    # Add the value subtracted to the attributes so we know that we modified it
    conv_ds_dict[PIPS_name]['slowtemp_corrected'].attrs['bias_subtracted'] = mean_diff_T
    


In [None]:
# Ok, now plot the bias-corrected slowtemps along with the fasttemps


fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp[:1]:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['slowtemp_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_slowT', 
                                                                              ls='None', marker='o', ms=1., alpha=0.5)    
plt.gca().set_prop_cycle(None)

for PIPS_name in PIPS_to_comp[:1]:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['fasttemp'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_fastT', 
                                                                    ls='None', marker='x', ms=5., alpha=0.75)
    
ax.legend()

In [None]:
# Now, we need to recompute the dewpoint and RH_derived using the corrected slowtemps

for PIPS_name in PIPS_to_comp:
    pressure = conv_ds_dict[PIPS_name]['pressure']
    slowtemp = conv_ds_dict[PIPS_name]['slowtemp_corrected']
    fasttemp = conv_ds_dict[PIPS_name]['fasttemp']
    RH = conv_ds_dict[PIPS_name]['RH']
    dewpoint = thermo.calTdfromRH(pressure * 100., slowtemp + 273.15, RH / 100.) - 273.15
    dewpoint.sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_dewpoint', 
                                                         ls='None', marker='o', ms=1., alpha=0.5)
    RH_derived = thermo.calRH(pressure * 100., fasttemp + 273.15, dewpoint + 273.15) * 100.
    
    conv_ds_dict[PIPS_name]['dewpoint_corrected'] = conv_ds_dict[PIPS_name]['dewpoint'].copy()
    conv_ds_dict[PIPS_name]['dewpoint_corrected'].data = dewpoint
    
    conv_ds_dict[PIPS_name]['RH_derived_corrected'] = conv_ds_dict[PIPS_name]['RH_derived'].copy()
    conv_ds_dict[PIPS_name]['RH_derived_corrected'].data = RH_derived


In [None]:
# Plot the timeseries of RH and RH_derived
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['RH_derived'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_RH_derived', 
                                                              ls='None', marker='o', ms=1., alpha=0.5)    
plt.gca().set_prop_cycle(None)

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['RH_derived_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_RH_derived_cor', 
                                                                    ls='None', marker='x', ms=5., alpha=0.75)
    
ax.legend()

In [None]:
# Plot the timeseries of dewpoint
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    conv_ds = conv_ds_dict[PIPS_name]
    conv_ds['dewpoint_corrected'].sel(time=slice(time_start, time_stop)).plot(ax=ax, label=f'{PIPS_name}_dewpoint'), 

ax.legend()

In [None]:
# Now resample the corrected slowtemp, dewpoint, and RH_derived to the parsivel times



for PIPS_name in PIPS_to_comp:
    PSD_datetimes = pips.get_PSD_datetimes(parsivel_ds_dict[PIPS_name]['VD_matrix'])
    sec_offset = PSD_datetimes[0].second
    print(sec_offset)
    offset_str = pips.get_interval_str(sec_offset)
    
    slowtemp_corrected = conv_ds_dict[PIPS_name]['slowtemp_corrected']
    new_slowtemp = slowtemp_corrected.resample(time=intervalstr, label='right', closed='right', 
                                               offset=offset_str).mean()
    dewpoint_corrected = conv_ds_dict[PIPS_name]['dewpoint_corrected']
    new_dewpoint = dewpoint_corrected.resample(time=intervalstr, label='right', closed='right', 
                                               offset=offset_str).mean()
    RH_derived_corrected = conv_ds_dict[PIPS_name]['RH_derived_corrected']
    new_RH_derived = RH_derived_corrected.resample(time=intervalstr, label='right', closed='right', 
                                                   offset=offset_str).mean()
    
    parsivel_ds_dict[PIPS_name]['slowtemp_corrected'] = new_slowtemp
    parsivel_ds_dict[PIPS_name]['dewpoint_corrected'] = new_dewpoint
    parsivel_ds_dict[PIPS_name]['RH_derived_corrected'] = new_RH_derived


In [None]:
fig, ax = plt.subplots()

for PIPS_name in PIPS_to_comp:
    parsivel_ds_dict[PIPS_name]['RH_derived_corrected'].plot(ax=ax, label=f'{PIPS_name}_RH_derived_corrected', 
                                                             ls='None', marker='o', ms=1., alpha=0.75)
    parsivel_ds_dict[PIPS_name]['RH_derived'].plot(ax=ax, label=f'{PIPS_name}_RH_derived', 
                                                   ls='None', marker='x', ms=5., alpha=0.5)
    
ax.legend()

In [None]:
# Now save to new output directory
for PIPS_name, output_parsivel_filepath, output_conv_filepath in zip(PIPS_names, 
                                                                     output_parsivel_filepaths, 
                                                                     output_conv_filepaths):
    if PIPS_name in PIPS_to_comp:
        print(PIPS_name)
        
        
        print("Saving {}".format(output_parsivel_filepath))
        parsivel_ds_dict[PIPS_name].to_netcdf(output_parsivel_filepath)
        print("Saving {}".format(output_conv_filepath))
        conv_ds_dict[PIPS_name].to_netcdf(output_conv_filepath)