# A notebook to copy and clean up PIPS netCDF files for upload to the PERiLS EOL repository

In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
import numpy.ma as ma
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.ticker as ticker
import matplotlib.dates as dates
from mpl_toolkits.axes_grid1 import ImageGrid,make_axes_locatable,host_subplot
#from mpl_toolkits.basemap import Basemap
from datetime import datetime, timedelta
import sys
import os
import pyPIPS.utils as utils
import pyPIPS.thermolib as thermo
import pyPIPS.DSDlib as dsd
#import pyPIPS.disdrometer_module as dis
import pyPIPS.plotmodule as PIPSplot
#import pyPIPS.simulator as sim
import pyPIPS.pips_io as pipsio
import pyPIPS.PIPS as pips
import pyPIPS.parsivel_params as pp
import pyPIPS.parsivel_qc as pqc
import pyPIPS.polarimetric as dualpol
#from pyCRMtools.modules import plotmodule as plotmod
from pyCRMtools.modules import utils as CRMutils
# from pyCRMtools.pycaps import arps_read
# from pyCRMtools.pycaps import pycaps_fields
# from pyCRMtools.pycaps import calvars_radar as radar
import pandas as pd
import xarray as xr
import glob
import numpy.random as random
from scipy.stats import gamma, uniform
from scipy.special import gamma as gammafunc
from scipy import ndimage
from metpy.plots import StationPlot
from metpy.calc import wind_components
from metpy.cbook import get_test_data
from metpy.plots import StationPlot
from metpy.plots.wx_symbols import current_weather, sky_cover
from metpy.units import units
import warnings
warnings.simplefilter('ignore')
%matplotlib notebook

In [None]:
# Read in the original PIPS netcdf files

PIPS_input_base_dir = '/Users/dawson29/Projects/PERiLS/obsdata/2022/PIPS_data/'
PIPS_output_base_dir = '/Users/dawson29/Projects/PERiLS/obsdata/2022/PIPS_data_for_EOL/'

PIPS_input_base_dir = '/Users/dawson29/PIPS_data/2023/'
PIPS_output_base_dir = '/Users/dawson29/PIPS_data/2023_trimmed/'

deployment_name = 'IOP3_032423' # 'IOP3_040522'
PIPS_input_dir = os.path.join(PIPS_input_base_dir, deployment_name, 'netcdf')
PIPS_output_dir = os.path.join(PIPS_output_base_dir, deployment_name, 'netcdf')
if not os.path.exists(PIPS_output_dir):
    os.makedirs(PIPS_output_dir)

# IOP1 2022
# PIPS_names = ['PIPS1A', 'PIPS1B', 'PIPS2A', 'PIPS2B']
# IOP2 2022
# PIPS_names = ['PIPS1A', 'PIPS1B', 'PIPS2A', 'PIPS3B']
# IOP3 2022
# PIPS_names = ['PIPS1A', 'PIPS1B', 'PIPS2A', 'PIPS2B', 'PIPS3A', 'PIPS3B']
# IOP3 2023
PIPS_names = ['PIPS2A', 'PIPS3A']
parsivel_interval = 10
parsivel_filenames = ['parsivel_combined_{}_{}_{:d}s.nc'.format(deployment_name, PIPS_name, parsivel_interval)
                      for PIPS_name in PIPS_names]
parsivel_filepaths = [os.path.join(PIPS_input_dir, parsivel_filename) for parsivel_filename in parsivel_filenames]
output_parsivel_filepaths = [os.path.join(PIPS_output_dir, parsivel_filename) 
                             for parsivel_filename in parsivel_filenames]
conv_filenames = ['conventional_raw_{}_{}.nc'.format(deployment_name, PIPS_name) for PIPS_name in PIPS_names]
conv_filepaths = [os.path.join(PIPS_input_dir, conv_filename) for conv_filename in conv_filenames]
output_conv_filepaths = [os.path.join(PIPS_output_dir, conv_filename) for conv_filename in conv_filenames]
parsivel_ds_dict = {}
conv_ds_dict = {}
for PIPS_name, parsivel_filepath, conv_filepath in zip(PIPS_names, parsivel_filepaths, conv_filepaths):
    parsivel_ds_dict[PIPS_name] = xr.load_dataset(parsivel_filepath)
    conv_ds_dict[PIPS_name] = xr.load_dataset(conv_filepath)

In [None]:
PIPS_to_check = 'PIPS3A'

In [None]:
parsivel_ds = parsivel_ds_dict[PIPS_to_check]
parsivel_ds

In [None]:
conv_ds = conv_ds_dict[PIPS_to_check]
conv_ds

In [None]:
conv_ds.plot.scatter(x='GPS_lon', y='GPS_lat')

In [None]:
# Plot the timeseries of compass headings
conv_ds['compass_dir'].plot()
parsivel_ds['compass_dir'].plot()

In [None]:
# Make initial cut of times based on manual inspection of above plot
start_time = '2023-03-24T23:00'
end_time = '2023-03-25T03:00'

conv_ds = conv_ds.sel(time=slice(start_time, end_time))
parsivel_ds = parsivel_ds.sel(time=slice(start_time, end_time))


In [None]:
# Plot the timeseries of compass headings again after first cut
conv_ds['compass_dir'].plot()
parsivel_ds['compass_dir'].plot()

In [None]:
# These next few cells are primarily for PIPS3A or 3B to remove erroneous large compass direction fluctuations
threshold_low = conv_ds['compass_dir'].quantile(0.054)
threshold_high = conv_ds['compass_dir'].quantile(0.999)
mean = conv_ds['compass_dir'].mean()
median = conv_ds['compass_dir'].median()
print(mean, median)

In [None]:
threshold_low

In [None]:
threshold_high

In [None]:
mask = conv_ds['compass_dir'].where((abs(conv_ds['compass_dir']) >= threshold_low) 
                                    & (abs(conv_ds['compass_dir']) <= threshold_high))
print(mask)
mean_value = mask.mean().values
print(mean_value)
# .where replace outliers with nan
mask = mask.fillna(mean_value)
print(mask)
mask.plot()

In [None]:
# Take a closer look at the beginning of the interval if needed
start_time = conv_ds['time'][0].values
end_time = conv_ds['time'][300].values
print(start_time, end_time)
conv_ds['compass_dir'].sel(time=slice(start_time, end_time)).plot()
parsivel_ds['compass_dir'].sel(time=slice(start_time, end_time)).plot(marker='o')

In [None]:
# Take a closer look at the end of the interval if needed
start_time = conv_ds['time'][-300].values
end_time = conv_ds['time'][-1].values
print(start_time, end_time)
conv_ds['compass_dir'].sel(time=slice(start_time, end_time)).plot()
parsivel_ds['compass_dir'].sel(time=slice(start_time, end_time)).plot(marker='o')

In [None]:
print(parsivel_ds['time'][0])
print(parsivel_ds['time'][-1])

In [None]:
# New start and end times for deployments (determined manually by looking at compass heading variability)
# IOP#1 03/22/22
# new_start_times = ['2022-03-22T19:43:12.000000000', '2022-03-22T19:57:12.000000000', 
#                    '2022-03-22T19:24:32.000000000', '2022-03-22T19:34:43.000000000']
# new_end_times = ['2022-03-22T21:36:52.000000000', '2022-03-22T21:41:52.000000000',
#                  '2022-03-22T21:24:42.000000000', '2022-03-22T21:30:43.000000000']
# IOP#2 03/30/22
# new_start_times = ['2022-03-30T23:49:53.000000000', '2022-03-30T23:49:53.000000000', 
#                    '2022-03-30T23:59:03.000000000', '2022-03-31T00:00:29.000000000']
# new_end_times = ['2022-03-31T01:25:23.000000000', '2022-03-31T01:25:23.000000000',
#                  '2022-03-31T01:11:43.000000000', '2022-03-31T01:14:09.000000000']
# IOP#3 04/05/22
# new_start_times = ['2022-04-05T14:46:13.000000000', '2022-04-05T14:38:00.000000000', 
#                    '2022-04-05T15:07:13.000000000', '2022-04-05T14:57:44.000000000',
#                    '2022-04-05T16:00:29.000000000', '2022-04-05T15:21:39.000000000']
# new_end_times = ['2022-04-05T16:51:23.000000000', '2022-04-05T16:45:40.000000000',
#                  '2022-04-05T17:02:13.000000000', '2022-04-05T16:58:24.000000000',
#                  '2022-04-05T17:15:29.000000000', '2022-04-05T17:14:49.000000000']

# IOP#3 03/24/23
new_start_times = ['2023-03-24T23:40:49.000000000', '2023-03-24T23:35:19.000000000']
new_end_times = ['2023-03-25T02:50:59.000000000', '2023-03-25T02:41:59.000000000']

In [None]:
# Now go through and trim the times accordingly
output_parsivel_ds_dict = {}
output_conv_ds_dict = {}

for PIPS_name, new_start_time, new_end_time in zip(PIPS_names, new_start_times, new_end_times):
    output_parsivel_ds_dict[PIPS_name] = parsivel_ds_dict[PIPS_name].sel(time=slice(new_start_time, new_end_time))
    output_conv_ds_dict[PIPS_name] = conv_ds_dict[PIPS_name].sel(time=slice(new_start_time, new_end_time))
    

In [None]:
# Now go through and update CF encoding string for start time as well as the global attributes
output_parsivel_ds_dict = parsivel_ds_dict
output_conv_ds_dict = conv_ds_dict

for PIPS_name in PIPS_names:
    start_datetime = output_parsivel_ds_dict[PIPS_name].time[0].values
    end_datetime = output_parsivel_ds_dict[PIPS_name].time[-1].values
    start_datetime = pd.to_datetime(start_datetime)
    end_datetime = pd.to_datetime(end_datetime)
    start_time_str = start_datetime.strftime('%Y-%m-%d %H:%M:%S')
    output_parsivel_ds_dict[PIPS_name].time.encoding['units'] = "Seconds since {}".format(start_time_str)
    output_conv_ds_dict[PIPS_name].time.encoding['units'] = "Seconds since {}".format(start_time_str)
    # Also update the global attributes for start and end times accordingly
    output_parsivel_ds_dict[PIPS_name].attrs['starting_time'] = start_datetime.strftime('%Y%m%d%H%M%S')
    output_parsivel_ds_dict[PIPS_name].attrs['ending_time'] = end_datetime.strftime('%Y%m%d%H%M%S')
    output_conv_ds_dict[PIPS_name].attrs['starting_time'] = start_datetime.strftime('%Y%m%d%H%M%S')
    output_conv_ds_dict[PIPS_name].attrs['ending_time'] = end_datetime.strftime('%Y%m%d%H%M%S')
    

In [None]:
# Now save to new output directory
for PIPS_name, output_parsivel_filepath, output_conv_filepath in zip(PIPS_names, 
                                                                     output_parsivel_filepaths, 
                                                                     output_conv_filepaths):
    
    print("Saving {}".format(output_parsivel_filepath))
    output_parsivel_ds_dict[PIPS_name].to_netcdf(output_parsivel_filepath)
    print("Saving {}".format(output_conv_filepath))
    output_conv_ds_dict[PIPS_name].to_netcdf(output_conv_filepath)

In [None]:
# Remove some attributes that are no longer needed
# for parsivel_filename in parsivel_filenames:
#     print("Removing unneeded attributes for {}".format(parsivel_filename))
#     parsivel_ds = xr.load_dataset(parsivel_filename)
#     # del parsivel_ds.attrs['CG_coeff_SATP_new']
#     # del parsivel_ds.attrs['CG_coeff_TMM_F_new']
#     del parsivel_ds.attrs['CG_coeff']
#     parsivel_ds.to_netcdf(parsivel_filename)