In [1]:
# Script for importing netcdf, finding optimal shuffle settings for encoding, and then saving the dataset with these settings
import numpy as np
import xarray as xr
import os   
import copy
import re
import datetime

In [2]:
# To change
# drop time_string
# radius =double(radius)*1000, add scale factor
# time_num: chunk
# change metadata radius
# try to add time coordinate, otherwise change metadata time_num
# check first and last time: in open air?
# make beam_angle 720*16, drop ini_beam_angle

# To try
# re-order dimensions

In [None]:
map_file_in = r'O:\HybridDune experiment\2024-12-18 to 2024-12-20, Storm 1\Lidars\20241220_LiDAR1\storm1_lidar1_polar_10sInterval - Copy2.nc'
map_file_out = r'O:\HybridDune experiment\2024-12-18 to 2024-12-20, Storm 1\Lidars\20241220_LiDAR1\sstorm1_lidar1_polar_10sInterval - new2.nc'

ds = xr.open_dataset(map_file_in, chunks = "auto")  # load the dataset with specified chunking
#print(ds.radius_lidar)

# cast radius_lidar as float64
radius_attrs = ds['radius_lidar'].attrs.copy()
ds['radius_lidar'] = ds['radius_lidar'].astype('float64') / 1000 # convert to meters. Removes attributes, so reset/correct them below
#ds['radius_lidar'].attrs = radius_attrs  # restore attributes
ds['radius_lidar'].attrs = {'long name': 'distance (radius) from lidar to point', 
                            'units': 'm', 
                            'comment': 'radius, part of polar coordinates of points. Polar angles per point can be calculated from the profile_angle and beam_angle', 
                            'dimensions': 'T x 720 x 16 x 3 (Matlab) or reverse order (Python XArray), for number of point clouds x (360°x0.5° angular resolution) x 16 profiles x 3 echos. The lidar registers at most 3 echos at each angle, usually less.'}

# rename coordinate time to t
ds = ds.rename({'time': 't'})  # rename time coordinate to t
ds['t'].attrs['comment'] = 'UTC+1: local wintertime'  # change long_name of t to time

# assign values in time_num to coordinate of t
ds = ds.assign_coords(t=ds.time_num_int64.values)  # assign values in time_num to coordinate of t

# make beam_angle instead of ini_beam_angle
angles = np.arange(0, 360, 0.5)  # create angles
ini_beam_angle = ds['ini_beam_angle'].values  # get ini_beam_angle values. 1x16 matrix
beam_angle = np.repeat(ini_beam_angle[np.newaxis, :], 720, axis=0)  # repeat ini_beam_angle 720 times, for each angle. gives 720x16 matrix
beam_angle = beam_angle + np.repeat(angles[:, np.newaxis], 16, axis=1)  # repeat angles for each profile, gives 720x16 matrix


ds['beam_angle'] = (('profile_number', 'obs_number'), beam_angle.T)  # add beam_angle as a new coordinate
ds['beam_angle'].attrs = {'long name': 'angle of the lidar beam', 
                          'units': 'degrees', 
                          'comment': 'the angular resolution is 0.5°. So every next measurement within a profile is exactly 0.5 degrees later. The angle of the first point differs slightly between profiles. Angles are identical between epochs (i.e. over time).'}

# Drop variables: time_string, time_num, ini_beam_angle
ds = ds.drop_vars('time_string')     # drop time_string variable
ds = ds.drop_vars('time_num')        # drop time_num variable
ds = ds.drop_vars('time_num_int64')        # drop time_num variable
ds = ds.drop_vars('ini_beam_angle')  # drop ini_beam_angle variable
#ds = ds[['beam_angle']]  # keep only the 'r' variable

# save data
compression = {var: {"zlib": True, "complevel": 4} for var in list(ds.data_vars) + list(ds.coords)}  # temporary dict, with only compression settings
chunksize_t = np.min([28800, ds.sizes['t']])  # set chunk size for time dimension, max 28800 (2 hrs of 4hz data), unless total datasize is smaller
encoding = {'radius_lidar': { 'scale_factor': 0.001, 'dtype': 'uint16', '_FillValue': 0, 'chunksizes': (1, 1, 120, chunksize_t)},
             'intensity':   {                        'dtype': 'uint8',  '_FillValue': 0, 'chunksizes': (1, 1, 120, chunksize_t)} }  # encoding for the dataset  

for var, comp in compression.items():  # for each variable in the dataset, 
    if var in encoding:                # if the variable already has an encoding, update it with the compression settings
        encoding[var].update(comp)
    else:                              # if the variable does not have an encoding yet, add it 
        encoding[var] = comp

ds.to_netcdf(map_file_out, encoding=encoding, compute=True)  



  chunksize_t = np.min([28800, ds.dims['t']])  # set chunk size for time dimension, max 28800 (2 hrs of 4hz data), unless total datasize is smaller
