In [None]:
import glob
import os.path

import netCDF4
import numpy

In [None]:
time_name = 'time_average_5d'
depth_name = 'deptht'

In [None]:
# Step 1: combine yearly NetCDF grid/ptrc files into one complete forcing file
# This also adds a variable for cell thicknesses

thickness = numpy.array("""1.0200 	    1.0800	    1.1500	    1.2300	    1.3400	    1.4700	    1.6300	    1.8300	    2.0800	    2.3700	    2.7100	    3.1100	    3.5600	    4.0500	    4.5900	    5.1500	    5.7300	    6.3300	    6.9500	    7.5800	    8.2400	    8.9400	    9.7000	   10.5300	   11.4600	   12.5000	   13.6800	   15.0100	   16.5400	   18.2700	   20.2500	   22.5000	   25.0500	   27.9400	   31.1900	   34.8300	   38.8900	   43.3900	   48.3500	   53.7600	   59.6200	   65.9200	   72.6100	   79.6600	   87.0000	   94.5600	  102.2600	  110.0100	  117.7100	  125.2900	  132.6400	  139.7100	  146.4300	  152.7500	  158.6400	  164.0800	  169.0600	  173.5800	  177.6700	  181.3300	  184.6000	  187.5000	  190.0600	  192.3100	  194.2900	  196.0200	  197.5300	  198.8400	  199.9800	  200.9700	  201.8300	  202.5700	  203.2000	  203.7500	  204.2300
""".split('\t'), dtype=float)

def combine(dir_path, path_out, last_year=None):
    time_units, z = None, None
    data = {}
    for path in sorted(glob.glob(os.path.join(dir_path, '*grid_T.nc'))):
        print(path)
        with netCDF4.Dataset(path) as nc:
            nctime = nc[time_name]
            if time_units is None:
                time_units = nctime.units
                z = nc[depth_name][:]
            assert time_units == nctime.units, 'Time units mismatch: %s vs %s' % (time_units, nctime.units)
            numtime = nctime[:]
            data.setdefault('votemper', []).append(nc['votemper'][:, :, 0, 0])
            data.setdefault('numtime', []).append(numtime)
        with netCDF4.Dataset(path.replace('_grid_', '_ptrc_')) as nc:
            nctime = nc[time_name]
            assert time_units == nctime.units, 'Time units mismatch between grid and ptrc: %s vs %s' % (time_units, nctime.units)
            assert numtime.shape == nctime.shape, 'Time shape mismatch between grid and ptrc: %s vs %s' % (numtime.shape, nctime.shape)    
            assert (numtime == nctime[:]).all(), 'Time value mismatch between grid and ptrc: %s vs %s' % (numtime, nctime[:])
            data.setdefault('PHN', []).append(nc['PHN'][:, :, 0, 0])
            data.setdefault('PHD', []).append(nc['PHD'][:, :, 0, 0])
            data.setdefault('ZMI', []).append(nc['ZMI'][:, :, 0, 0])
            data.setdefault('ZME', []).append(nc['ZME'][:, :, 0, 0])
        if last_year is not None and path.endswith('%04i_grid_T.nc' % last_year):
            break

    with netCDF4.Dataset(path_out, 'w') as nc:
        nc.createDimension(time_name)
        nc.createDimension(depth_name, thickness.size)
        ncvar = nc.createVariable(depth_name, z.dtype, (depth_name,))
        ncvar.units = 'm'
        ncvar[:] = z
        ncvar = nc.createVariable('h', thickness.dtype, (depth_name,))
        ncvar.long_name = 'cell_thickness'
        ncvar.units = 'm'
        ncvar[:] = thickness

        for name, slabs in data.items():
            if name == 'numtime':
                ncvar = nc.createVariable(time_name, slabs[0].dtype, (time_name,))
                ncvar.units = time_units
            else:
                ncvar = nc.createVariable(name, slabs[0].dtype, (time_name, depth_name))
            i = 0
            for slab in slabs:
                ncvar[i:i + slab.shape[0], ...] = slab
                i += slab.shape[0]

In [None]:
# Step 2: compute weighted depth averages
def depth_average(path_in, path_out):
    with netCDF4.Dataset(path_in) as ncin, netCDF4.Dataset(path_out, 'w') as ncout:
        Ptot = ncin['PHN'][:,:] + ncin['PHD'][:,:] + ncin['ZMI'][:,:] + ncin['ZME'][:,:]
        h = ncin['h'][:]
        w = Ptot * h
        w_int = w.sum(axis=1)
        w2_int = (Ptot**2 * h).sum(axis=1)
        nctime_in = ncin[time_name]
        ncout.createDimension(time_name)
        nctime_out = ncout.createVariable(time_name, nctime_in.dtype, (time_name,))
        nctime_out.units = nctime_in.units
        nctime_out[:] = nctime_in[:]
        ncout.createVariable('w_int', w_int.dtype, (time_name,))[:] = w_int
        ncout.createVariable('w2_int', w2_int.dtype, (time_name,))[:] = w2_int
        for name in ('votemper', 'PHN', 'PHD', 'ZMI', 'ZME'):
            average = (w * ncin[name][:,:]).sum(axis=1) / w_int
            ncvar_in = ncin[name]
            ncvar_out = ncout.createVariable(name, ncvar_in.dtype, (time_name,))
            ncvar_out[:] = average

In [None]:
for path in glob.glob('../datasets/NEMO-MEDUSA-projections/*'):
    if os.path.isdir(path):
        combine(path, path + '.nc', last_year=2046)
        depth_average(path + '.nc', path + '_da.nc')