In [12]:
from pathlib import Path
import xarray as xr
import numpy as np

# in and out file paths 
original_data_dir = Path('data/original_data')

in_file = original_data_dir / 'CTD_all_1876-2019.nc'

transformed_data_dir = Path('data/transformed_data')

out_file = transformed_data_dir / 'CTD_all_1876-2019.nc'


#load in dataset
xrds = xr.open_dataset(in_file)

print(xrds)

<xarray.Dataset> Size: 13GB
Dimensions:      (PRES: 7225, TIME: 76708, CALIBRATION: 2)
Coordinates:
  * PRES         (PRES) float64 58kB 1.0 2.0 3.0 ... 7.224e+03 7.225e+03
  * TIME         (TIME) datetime64[ns] 614kB 1876-07-10T00:00:01.000004608 .....
    CALIBRATION  (CALIBRATION, TIME) float64 1MB ...
Data variables:
    STATION      (TIME) |S25 2MB ...
    LATITUDE     (TIME) float64 614kB ...
    LONGITUDE    (TIME) float64 614kB ...
    TEMP         (TIME, PRES) float64 4GB ...
    PSAL         (TIME, PRES) float64 4GB ...
    CNDC         (TIME, PRES) float64 4GB ...
    FDEP         (TIME) float64 614kB ...
    SHIP         (TIME) |S52 4MB ...
    SHIPID       (TIME) float32 307kB ...
    OWNER        (TIME) |S31 2MB ...
    CRUISE       (TIME) |S47 4MB ...
    TYPE         (TIME) |S47 4MB ...
Attributes: (12/38)
    cruise_ID:                     
    instrument:                    Shipborne observation
    institution:                   The University Centre in Svalbard, Nor

In [13]:


# Generate a unique integer CAST value for each time point
cast_values = np.arange(1, len(xrds['TIME']) + 1)

# Create a new CAST dimension with integer values
xrds = xrds.rename({'TIME': 'CAST'})

# Move the original TIME to a data variable that is CAST dependent
xrds['TIME'] = (('CAST',), xrds['CAST'].values)

# Assign integer CAST values
xrds['CAST'] = (('CAST',), cast_values)

# Modify the dimensions of data variables to be dependent on 'CAST' instead of 'TIME'
data_vars_to_reshape = [var for var in xrds.data_vars if 'CAST' in xrds[var].dims]

for var in data_vars_to_reshape:
    xrds[var] = xrds[var].rename({'CAST': 'CAST'})

# Save the modified dataset to a new NetCDF file
# xrds.to_netcdf('output.nc')
print(xrds['CAST'])
print(cast_values[:10])
print("The NetCDF file has been successfully modified and saved as 'output.nc'.")

<xarray.DataArray 'CAST' (CAST: 76708)> Size: 614kB
array([    1,     2,     3, ..., 66734, 66735, 66736])
Coordinates:
  * CAST     (CAST) int64 614kB 1 2 3 4 5 5 ... 66732 66733 66734 66735 66736
[ 1  2  3  4  5  6  7  8  9 10]
The NetCDF file has been successfully modified and saved as 'output.nc'.
