In [1]:
import netCDF4 as nc
import xarray as xr
import cProfile
from subprocess import call
import numpy as np
import os

In [2]:
data_folder = "/projects/NS4659K/chuncheng/cases_fram/NBF1850_f19_tn11_test_mis3b_fwf3b_fram"

## Read data - netCDF4 method doesn't work any longer

In [None]:
def S1_to_string(s1_array):
    if s1_array.ndim==2:
        return[''.join(list(map(lambda x: x.decode('utf-8'), s1_array[i]))).strip() for i in range(len(s1_array))]
    else:
        return [''.join(list(map(lambda x: x.decode('utf-8'), s1_array))).strip()]

In [None]:
def open_mfdataset(files, variables, saving_name=False):
    
    def mfdataset_to_xarray(mfds, variables):

        ds = xr.Dataset()

        for variable in variables:
            ds = ds.assign({variable:(mfds.variables[variable].dimensions, mfds.variables[variable][:])})
            ds[variable].attrs = {attribute:mfds.variables[variable].__dict__[attribute] for attribute in mfds.variables[variable].ncattrs()}

        for dimension in ds.dims:
            if dimension in mfds.variables:
                dimension_array = mfds.variables[dimension][:]
                if dimension_array.dtype == np.dtype("S1"):
                    ds = ds.assign_coords({dimension:S1_to_string(dimension_array.data)})
                else:
                    ds = ds.assign_coords({dimension:dimension_array[:]})
            else:
                ds = ds.assign_coords({dimension:np.arange(len(mfds.dimensions[dimension]))})
        
        return ds
    
    mfds = nc.MFDataset(files)
    final_ds = mfdataset_to_xarray(mfds, variables)
    mfds.close()

    if saving_name:
        final_ds.to_netcdf(f"/nird/home/yro/temp/{saving_name}")
    
    return final_ds

In [None]:
start_ts, end_ts = 1701, 1800

In [None]:
# ATMOSPHRERE FILES

names = [[f"{data_folder}/atm/hist/NBF1850_f19_tn11_test_mis3b_fwf3b_fram.cam2.h0.{year}-{month:02d}.nc" for month in range(1,13)] for year in range(start_ts, end_ts)]
files = list(itertools.chain.from_iterable(names))

# T2m

sat = open_mfdataset(files, ['TS'])
sat

# Sea Ice

icefrac = open_mfdataset(files, ['ICEFRAC'], saving_name="/nird/home/yro/temp/mis3.ice.nc")
# icefrac = open_mfdataset(files, ['ICEFRAC'])
icefrac

In [None]:
## YEARLY OCEAN FILES

files = [f"{data_folder}/ocn/hist/NBF1850_f19_tn11_test_mis3b_fwf3b_fram.micom.hy.{year}.nc" for year in range(start_ts, end_ts)]

# Ocean temperature

ocnT = open_mfdataset(files, ['temp'])
ocnT

# AMOC

amoc = open_mfdataset(files, ['mmflxd'])
amoc

In [None]:
## MONTHLY OCEAN FILES
names = [[f"{data_folder}/ocn/hist/NBF1850_f19_tn11_test_mis3b_fwf3b_fram.micom.hm.{year}-{month:02d}.nc" for month in range(1,13)] for year in range(start_ts, end_ts)]
files = list(itertools.chain.from_iterable(names))

# MLD

mld = open_mfdataset(files, ['maxmld'])
mld

In [8]:
xr.open_dataset(f"{data_folder}/atm/hist/NBF1850_f19_tn11_test_mis3b_fwf3b_fram.cam2.h0.2000-01.nc")

In [7]:
xr.open_dataset(f"{data_folder}/ocn/hist/NBF1850_f19_tn11_test_mis3b_fwf3b_fram.micom.hy.2000.nc")

In [6]:
xr.open_dataset(f"{data_folder}/ocn/hist/NBF1850_f19_tn11_test_mis3b_fwf3b_fram.micom.hm.2000-01.nc")

## Read time series - benchmark

In [3]:
# Concat netCDF4 Dataset

def method_1(files):
    to_concat = []
    for file in files:
        to_concat.append(xr.open_dataset(xr.backends.NetCDF4DataStore(nc.Dataset(file))).temp)
    return xr.concat(to_concat, dim='time')

In [4]:
# Xarray open_mfdataset

def method_2(files):
    return xr.open_mfdataset(files).temp

In [5]:
def method_2bis(files):
    ds = xr.open_mfdataset(files, parallel=True).temp
    return ds.load()

In [6]:
def method_2ter(files):
    return xr.open_mfdataset(files, parallel=True, chunks={'time': 10}).temp.load()

In [7]:
def method_2four(files):
    return xr.open_mfdataset(files).temp.chunk({'time': 10}).load()

In [15]:
def mfdataset_to_xarray(mfds, variables):

    ds = xr.Dataset()

    for variable in variables:
        ds = ds.assign({variable:(mfds.variables[variable].dimensions, mfds.variables[variable][:])})
        ds[variable].attrs = {attribute:mfds.variables[variable].__dict__[attribute] for attribute in mfds.variables[variable].ncattrs()}

    for dimension in ds.dims:
        if dimension in mfds.variables:
            ds = ds.assign_coords({dimension:mfds.variables[dimension][:]})
        else:
            ds = ds.assign_coords({dimension:np.arange(len(mfds.dimensions[dimension]))})

In [16]:
def method_3(files, variables):
    return mfdataset_to_xarray(nc.MFDataset(files), variables)


In [10]:
def cat_in_chunks(list_files, cat_file, var="", dimensions="", N_cat_max=1000):
    """
    Append file in list_files to the end of cat_file
    in chunks of N_cat_max files at a time
    """
    list_files2cat = list_files[:]
    list_temp2cat = []  # temporary files to be concatenated at the end
    i = 0
    # Loop until there aren't any files left in the list
    while list_files2cat:
        # We take N files in chunks; either the max number (N_cat_max)
        # or the last chunk of the list
        N = min(len(list_files2cat), N_cat_max)
        chunk_files2cat = list_files2cat[0:N]
        # Concat the chunk of files together and append the output file to the list
        temp_output = '/nird/home/yro/temp/temp.{0}.nc'.format(str(i))
        if var:
            cmd = 'ncrcat -O --no_tmp_fl {d} -v {v} {fin} {fout}'.format(
                d=dimensions, v=var, fin=' '.join(chunk_files2cat),
                fout=temp_output
            )
            call(cmd, shell=True)
        else:
            cmd = 'ncrcat -O --no_tmp_fl {d} {fin} {fout}'.format(
                d=dimensions, v=var, fin=' '.join(chunk_files2cat),
                fout=temp_output
            )
            call(cmd, shell=True)
        list_temp2cat.append(temp_output)

        # delete first N elements from this list now that we've used them
        del list_files2cat[0:N]
        i += 1
    # append all the temporary files to the end of cat_file
    cmd = f"ncrcat -O --no_tmp_fl {' '.join(list_temp2cat)} {'/nird/home/yro/temp/temp.all.nc'}"
    call(cmd, shell=True)
    os.rename('/nird/home/yro/temp/temp.all.nc', cat_file)

    # Tidy up temporary files
    for f in list_temp2cat:
        os.remove(f)


In [11]:
def method_4(files, years):
    cat_in_chunks(files, f"/nird/home/yro/temp/temp{years}.ocnt", var="temp")
    xr.open_dataset(f"/nird/home/yro/temp/temp{years}.ocnt")

### Test for 10 years

In [11]:
start_ts, end_ts = 1701, 1710
files = [f"{data_folder}/ocn/hist/NBF1850_f19_tn11_test_mis3b_fwf3b_fram.micom.hy.{year}.nc" for year in range(start_ts, end_ts)]

In [12]:
cProfile.run("method_1(files)", sort='cumtime')

         568362 function calls (556472 primitive calls) in 3.286 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    3.286    3.286 {built-in method builtins.exec}
        1    0.030    0.030    3.286    3.286 <string>:1(<module>)
        1    0.368    0.368    3.255    3.255 <ipython-input-3-86b087e63657>:3(method_1)
        1    0.000    0.000    2.106    2.106 concat.py:11(concat)
        1    0.007    0.007    2.106    2.106 concat.py:429(_dataarray_concat)
        1    0.000    0.000    2.099    2.099 concat.py:309(_dataset_concat)
2163/1339    0.143    0.000    2.092    0.002 {built-in method numpy.array}
  962/249    0.000    0.000    2.089    0.008 _asarray.py:14(asarray)
        2    0.000    0.000    2.089    1.044 variable.py:2524(concat)
        1    0.000    0.000    2.088    2.088 variable.py:1622(concat)
   144/81    0.000    0.000    2.087    0.026 indexing.py:555(__array__)
    

     1017    0.000    0.000    0.000    0.000 fromnumeric.py:1701(_ravel_dispatcher)
       18    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(argmin)
       74    0.000    0.000    0.000    0.000 common.py:806(is_unsigned_integer_dtype)
       36    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(copyto)
       54    0.000    0.000    0.000    0.000 {method 'format' of 'str' objects}
      189    0.000    0.000    0.000    0.000 indexing.py:541(<genexpr>)
      148    0.000    0.000    0.000    0.000 common.py:905(is_datetime64_any_dtype)
      451    0.000    0.000    0.000    0.000 merge.py:81(unique_variable)
       64    0.000    0.000    0.000    0.000 common.py:566(is_string_dtype)
       13    0.000    0.000    0.000    0.000 typing.py:712(__instancecheck__)
        1    0.000    0.000    0.000    0.000 concat.py:29(get_dtype_kinds)
       36    0.000    0.000    0.000    0.000 fromnumeric.py:52(_wrapfunc)
      216    0.000    0.000    0.

In [13]:
cProfile.run("method_2(files)", sort='cumtime')

         1473298 function calls (1444626 primitive calls) in 1.502 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    1.511    1.511 {built-in method builtins.exec}
        1    0.000    0.000    1.511    1.511 <string>:1(<module>)
        1    0.003    0.003    1.497    1.497 <ipython-input-4-aae7034360ec>:3(method_2)
        1    0.000    0.000    1.492    1.492 api.py:724(open_mfdataset)
        1    0.000    0.000    1.162    1.162 api.py:918(<listcomp>)
        9    0.000    0.000    1.162    0.129 api.py:288(open_dataset)
        9    0.000    0.000    0.840    0.093 api.py:450(maybe_decode_store)
        9    0.001    0.000    0.523    0.058 conventions.py:517(decode_cf)
      621    0.002    0.000    0.337    0.001 netCDF4_.py:360(_acquire)
1719/1098    0.002    0.000    0.331    0.000 contextlib.py:107(__enter__)
3521/2279    0.002    0.000    0.330    0.000 {built-in method builtins.n

        9    0.000    0.000    0.004    0.000 variable.py:1359(set_dims)
       47    0.000    0.000    0.004    0.000 combine.py:69(<genexpr>)
      281    0.001    0.000    0.004    0.000 fromnumeric.py:70(_wrapreduction)
     1246    0.002    0.000    0.004    0.000 dataset.py:567(variables)
2296/1756    0.002    0.000    0.004    0.000 utils.py:502(__call__)
     3602    0.003    0.000    0.004    0.000 enum.py:284(__call__)
     1719    0.002    0.000    0.004    0.000 contextlib.py:116(__exit__)
      173    0.000    0.000    0.004    0.000 <__array_function__ internals>:2(prod)
      161    0.000    0.000    0.004    0.000 cftimeindex.py:251(__new__)
     8926    0.003    0.000    0.004    0.000 core.py:2475(<genexpr>)
     1072    0.001    0.000    0.004    0.000 _asarray.py:86(asanyarray)
       45    0.000    0.000    0.004    0.000 alignment.py:297(<genexpr>)
      132    0.000    0.000    0.003    0.000 variable.py:2411(to_index)
      509    0.002    0.000    0.003    0.00

      533    0.000    0.000    0.000    0.000 common.py:180(<lambda>)
      452    0.000    0.000    0.000    0.000 utils.py:430(__contains__)
        9    0.000    0.000    0.000    0.000 {built-in method posix.stat}
       36    0.000    0.000    0.000    0.000 {method 'any' of 'numpy.generic' objects}
      572    0.000    0.000    0.000    0.000 {method 'encode' of 'str' objects}
      509    0.000    0.000    0.000    0.000 core.py:1331(name)
        1    0.000    0.000    0.000    0.000 generic.py:5384(astype)
       68    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(copyto)
        9    0.000    0.000    0.000    0.000 highlevelgraph.py:87(_from_collection)
        1    0.000    0.000    0.000    0.000 sre_compile.py:759(compile)
       54    0.000    0.000    0.000    0.000 construction.py:338(extract_array)
        1    0.000    0.000    0.000    0.000 concat.py:110(concat_compat)
       76    0.000    0.000    0.000    0.000 common.py:750(is_signed_intege

        2    0.000    0.000    0.000    0.000 series.py:398(_set_axis)
        2    0.000    0.000    0.000    0.000 construction.py:575(is_empty_data)
        1    0.000    0.000    0.000    0.000 combine.py:89(<listcomp>)
        9    0.000    0.000    0.000    0.000 core.py:459(quote)
      120    0.000    0.000    0.000    0.000 {pandas._libs.algos.ensure_object}
       25    0.000    0.000    0.000    0.000 sre_parse.py:254(get)
        9    0.000    0.000    0.000    0.000 lru_cache.py:57(_enforce_size_limit)
       72    0.000    0.000    0.000    0.000 fromnumeric.py:2245(_any_dispatcher)
       18    0.000    0.000    0.000    0.000 {method 'rfind' of 'str' objects}
       90    0.000    0.000    0.000    0.000 indexing.py:1387(dtype)
        3    0.000    0.000    0.000    0.000 generic.py:195(__init__)
        3    0.000    0.000    0.000    0.000 blocks.py:237(mgr_locs)
        1    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(putmask)
       90    0.00

In [14]:
cProfile.run("method_2bis(files)", sort='cumtime')

         598963 function calls (593923 primitive calls) in 3.881 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      3/1    0.000    0.000    3.884    3.884 {built-in method builtins.exec}
        1    0.030    0.030    3.884    3.884 <string>:1(<module>)
        1    0.004    0.004    3.854    3.854 <ipython-input-5-dfab172b92aa>:1(method_2bis)
        2    0.000    0.000    3.454    1.727 base.py:389(compute)
        2    0.000    0.000    3.237    1.619 threaded.py:33(get)
        2    0.000    0.000    3.231    1.615 local.py:347(get_async)
       55    0.000    0.000    3.228    0.059 threading.py:264(wait)
      256    3.227    0.013    3.227    0.013 {method 'acquire' of '_thread.lock' objects}
       36    0.000    0.000    3.227    0.090 local.py:132(queue_get)
       36    0.000    0.000    3.227    0.090 queue.py:153(get)
        1    0.000    0.000    2.728    2.728 api.py:724(open_mfdataset)
        1    0.

       26    0.000    0.000    0.001    0.000 uuid.py:759(uuid4)
       50    0.000    0.000    0.001    0.000 core.py:3502(<listcomp>)
      205    0.001    0.000    0.001    0.000 core.py:159(get_dependencies)
       90    0.000    0.000    0.001    0.000 variable.py:454(values)
       51    0.000    0.000    0.001    0.000 algorithms.py:178(_reconstruct_data)
       51    0.000    0.000    0.001    0.000 algorithms.py:272(_check_object_for_strings)
     1530    0.001    0.000    0.001    0.000 dataset.py:383(<genexpr>)
       18    0.000    0.000    0.001    0.000 combine.py:503(vars_as_keys)
     2446    0.001    0.000    0.001    0.000 core.py:2475(<genexpr>)
       90    0.000    0.000    0.001    0.000 variable.py:246(_as_array_or_item)
      225    0.000    0.000    0.001    0.000 common.py:1541(_is_dtype)
    72/27    0.000    0.000    0.001    0.000 delayed.py:23(unzip)
      2/1    0.000    0.000    0.001    0.001 <frozen importlib._bootstrap>:211(_call_with_frames_removed)


       26    0.000    0.000    0.000    0.000 uuid.py:121(__init__)
       77    0.000    0.000    0.000    0.000 common.py:171(_get_axis_num)
     1060    0.000    0.000    0.000    0.000 copy.py:111(_copy_immutable)
       67    0.000    0.000    0.000    0.000 core.py:121(<genexpr>)
        1    0.000    0.000    0.000    0.000 optimization.py:288(inline_functions)
        8    0.000    0.000    0.000    0.000 core.py:263(reverse_dict)
       33    0.000    0.000    0.000    0.000 missing.py:456(_array_equivalent_object)
     1000    0.000    0.000    0.000    0.000 multiarray.py:634(result_type)
       10    0.000    0.000    0.000    0.000 combine.py:74(<genexpr>)
       45    0.000    0.000    0.000    0.000 duck_array_ops.py:180(lazy_array_equiv)
        2    0.000    0.000    0.000    0.000 managers.py:1531(from_array)
       60    0.000    0.000    0.000    0.000 {method 'format' of 'str' objects}
        1    0.000    0.000    0.000    0.000 sre_parse.py:913(parse)
        2 

       10    0.000    0.000    0.000    0.000 dataset.py:598(encoding)
       19    0.000    0.000    0.000    0.000 base.py:201(optimization_function)
       63    0.000    0.000    0.000    0.000 local.py:453(<genexpr>)
      135    0.000    0.000    0.000    0.000 indexes.py:44(__init__)
        2    0.000    0.000    0.000    0.000 local.py:174(<dictcomp>)
       20    0.000    0.000    0.000    0.000 threading.py:728(_newname)
       36    0.000    0.000    0.000    0.000 _collections_abc.py:271(__subclasshook__)
       55    0.000    0.000    0.000    0.000 {method 'release' of '_thread.lock' objects}
        5    0.000    0.000    0.000    0.000 {built-in method builtins.min}
        1    0.000    0.000    0.000    0.000 combine.py:13(_infer_concat_order_from_positions)
        1    0.000    0.000    0.000    0.000 combine.py:89(<listcomp>)
        1    0.000    0.000    0.000    0.000 utils.py:278(is_scalar)
       90    0.000    0.000    0.000    0.000 utils.py:449(__init__)
 

        1    0.000    0.000    0.000    0.000 dataset.py:3726(<setcomp>)
        1    0.000    0.000    0.000    0.000 variable.py:2334(<listcomp>)
        1    0.000    0.000    0.000    0.000 utils.py:178(peek_at)
        1    0.000    0.000    0.000    0.000 blockwise.py:577(<setcomp>)
        9    0.000    0.000    0.000    0.000 core.py:1083(<listcomp>)
        6    0.000    0.000    0.000    0.000 highlevelgraph.py:82(dicts)
        4    0.000    0.000    0.000    0.000 base.py:247(<genexpr>)
        2    0.000    0.000    0.000    0.000 base.py:1056(<listcomp>)
        9    0.000    0.000    0.000    0.000 optimization.py:391(_enforce_max_key_limit)
       18    0.000    0.000    0.000    0.000 order.py:687(__init__)
       18    0.000    0.000    0.000    0.000 delayed.py:695(single_key)
        3    0.000    0.000    0.000    0.000 blocks.py:135(_check_ndim)
        1    0.000    0.000    0.000    0.000 generic.py:377(_get_axis)
        1    0.000    0.000    0.000    0.000 _i

In [15]:
cProfile.run("method_2ter(files)", sort='cumtime')

         597343 function calls (592307 primitive calls) in 3.879 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    3.882    3.882 {built-in method builtins.exec}
        1    0.030    0.030    3.882    3.882 <string>:1(<module>)
        1    0.003    0.003    3.852    3.852 <ipython-input-6-fa818f8f674c>:1(method_2ter)
        2    0.000    0.000    3.493    1.746 base.py:389(compute)
        2    0.000    0.000    3.273    1.637 threaded.py:33(get)
        2    0.000    0.000    3.273    1.637 local.py:347(get_async)
       36    0.000    0.000    3.269    0.091 local.py:132(queue_get)
       36    0.000    0.000    3.269    0.091 queue.py:153(get)
       34    0.000    0.000    3.268    0.096 threading.py:264(wait)
      172    3.268    0.019    3.268    0.019 {method 'acquire' of '_thread.lock' objects}
        1    0.000    0.000    2.782    2.782 api.py:724(open_mfdataset)
        1    0.

        2    0.000    0.000    0.000    0.000 merge.py:331(determine_coords)
        2    0.000    0.000    0.000    0.000 base.py:303(<listcomp>)
       54    0.000    0.000    0.000    0.000 construction.py:338(extract_array)
       41    0.000    0.000    0.000    0.000 re.py:271(_compile)
       50    0.000    0.000    0.000    0.000 core.py:3566(<listcomp>)
       51    0.000    0.000    0.000    0.000 inference.py:185(is_array_like)
       57    0.000    0.000    0.000    0.000 common.py:530(is_categorical_dtype)
    90/27    0.000    0.000    0.000    0.000 core.py:86(_execute_task)
      135    0.000    0.000    0.000    0.000 dataset.py:1371(indexes)
        2    0.000    0.000    0.000    0.000 core.py:394(toposort)
        6    0.000    0.000    0.000    0.000 combine.py:60(<listcomp>)
      591    0.000    0.000    0.000    0.000 blockwise.py:855(<listcomp>)
        2    0.000    0.000    0.000    0.000 core.py:325(_toposort)
       10    0.000    0.000    0.000    0.000 ba

        1    0.000    0.000    0.000    0.000 core.py:4150(chunks_from_arrays)
        9    0.000    0.000    0.000    0.000 optimization.py:323(inlinable)
     19/1    0.000    0.000    0.000    0.000 core.py:1085(<listcomp>)
       36    0.000    0.000    0.000    0.000 queue.py:216(_get)
      213    0.000    0.000    0.000    0.000 {method 'pop' of 'list' objects}
        5    0.000    0.000    0.000    0.000 common.py:218(asarray_tuplesafe)
       36    0.000    0.000    0.000    0.000 common.py:167(<genexpr>)
       61    0.000    0.000    0.000    0.000 common.py:348(is_datetime64_dtype)
       74    0.000    0.000    0.000    0.000 utils.py:238(is_dict_like)
       28    0.000    0.000    0.000    0.000 base.py:719(normalize_object)
       77    0.000    0.000    0.000    0.000 {method 'index' of 'tuple' objects}
        2    0.000    0.000    0.000    0.000 base.py:1007(get_scheduler)
        2    0.000    0.000    0.000    0.000 merge.py:369(coerce_pandas_values)
       20   

        4    0.000    0.000    0.000    0.000 merge.py:154(_assert_compat_valid)
        7    0.000    0.000    0.000    0.000 indexes.py:54(__iter__)
       27    0.000    0.000    0.000    0.000 core.py:4096(<genexpr>)
       54    0.000    0.000    0.000    0.000 order.py:660(<genexpr>)
        9    0.000    0.000    0.000    0.000 delayed.py:486(__dask_layers__)
       34    0.000    0.000    0.000    0.000 {method 'release' of '_thread.lock' objects}
       26    0.000    0.000    0.000    0.000 {method 'count' of 'list' objects}
        7    0.000    0.000    0.000    0.000 utils.py:297(is_valid_numpy_dtype)
        1    0.000    0.000    0.000    0.000 optimization.py:83(<setcomp>)
       27    0.000    0.000    0.000    0.000 core.py:4075(<genexpr>)
        9    0.000    0.000    0.000    0.000 core.py:4388(<lambda>)
        1    0.000    0.000    0.000    0.000 concat.py:139(<listcomp>)
       10    0.000    0.000    0.000    0.000 base.py:3890(_get_engine_target)
        6   

In [16]:
cProfile.run("method_2four(files)", sort='cumtime')

         1478115 function calls (1449423 primitive calls) in 2.681 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    2.689    2.689 {built-in method builtins.exec}
        1    0.030    0.030    2.689    2.689 <string>:1(<module>)
        1    0.003    0.003    2.658    2.658 <ipython-input-7-ac623c7e9dc6>:1(method_2four)
        1    0.000    0.000    1.568    1.568 api.py:724(open_mfdataset)
        1    0.000    0.000    1.238    1.238 api.py:918(<listcomp>)
        9    0.000    0.000    1.238    0.138 api.py:288(open_dataset)
        1    0.000    0.000    1.083    1.083 dataarray.py:796(load)
        1    0.000    0.000    1.083    1.083 dataset.py:629(load)
        1    0.000    0.000    1.083    1.083 base.py:389(compute)
        1    0.000    0.000    1.082    1.082 threaded.py:33(get)
        1    0.000    0.000    1.082    1.082 local.py:347(get_async)
       19    0.000    0.000   

       40    0.000    0.000    0.002    0.000 variable.py:384(compute)
        1    0.000    0.000    0.002    0.002 dataset.py:1200(_item_sources)
        1    0.000    0.000    0.002    0.002 dataset.py:1207(<dictcomp>)
     4322    0.002    0.000    0.002    0.000 core.py:975(<genexpr>)
     7519    0.002    0.000    0.002    0.000 {built-in method math.isnan}
       72    0.001    0.000    0.002    0.000 indexing.py:948(_decompose_outer_indexer)
        1    0.000    0.000    0.002    0.002 dataarray.py:957(chunk)
     3006    0.001    0.000    0.002    0.000 indexing.py:690(as_indexable)
     6494    0.002    0.000    0.002    0.000 core.py:2481(<genexpr>)
      591    0.001    0.000    0.002    0.000 core.py:3092(common_blockdim)
     7057    0.002    0.000    0.002    0.000 {built-in method builtins.issubclass}
      900    0.001    0.000    0.002    0.000 core.py:1776(astype)
      552    0.001    0.000    0.002    0.000 common.py:1460(is_extension_array_dtype)
     8936    0.0

       19    0.000    0.000    0.000    0.000 threading.py:499(__init__)
      922    0.000    0.000    0.000    0.000 core.py:1066(__dask_graph__)
        1    0.000    0.000    0.000    0.000 concat.py:29(get_dtype_kinds)
      900    0.000    0.000    0.000    0.000 inspect.py:2835(parameters)
        9    0.000    0.000    0.000    0.000 posixpath.py:376(abspath)
        1    0.000    0.000    0.000    0.000 dataset.py:394(__getitem__)
        9    0.000    0.000    0.000    0.000 indexing.py:278(slice_slice)
        3    0.000    0.000    0.000    0.000 blocks.py:2696(make_block)
        1    0.000    0.000    0.000    0.000 blocks.py:514(astype)
       18    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(argmin)
      171    0.000    0.000    0.000    0.000 common.py:149(cast_scalar_indexer)
       58    0.000    0.000    0.000    0.000 dataset.py:1386(data_vars)
      203    0.000    0.000    0.000    0.000 _collections_abc.py:719(__iter__)
        2    0.000 

        1    0.000    0.000    0.000    0.000 rechunk.py:227(<dictcomp>)
       45    0.000    0.000    0.000    0.000 rechunk.py:571(<genexpr>)
        1    0.000    0.000    0.000    0.000 optimization.py:111(optimize_slices)
        2    0.000    0.000    0.000    0.000 core.py:1072(__dask_keys__)
       24    0.000    0.000    0.000    0.000 range.py:316(dtype)
        1    0.000    0.000    0.000    0.000 function_base.py:1152(diff)
       58    0.000    0.000    0.000    0.000 dataset.py:378(__init__)
      135    0.000    0.000    0.000    0.000 indexes.py:44(__init__)
      6/1    0.000    0.000    0.000    0.000 local.py:285(nested_get)
       46    0.000    0.000    0.000    0.000 local.py:453(<genexpr>)
        1    0.000    0.000    0.000    0.000 blocks.py:2368(__init__)
       63    0.000    0.000    0.000    0.000 multiarray.py:311(where)
       69    0.000    0.000    0.000    0.000 multiarray.py:1043(copyto)
        9    0.000    0.000    0.000    0.000 api.py:92(_get_

In [17]:
cProfile.run("method_3(files)", sort='cumtime')

  


         8282 function calls (8237 primitive calls) in 4.229 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    4.229    4.229 {built-in method builtins.exec}
        1    0.078    0.078    4.229    4.229 <string>:1(<module>)
        1    1.912    1.912    4.150    4.150 <ipython-input-8-82b5311a71a3>:1(method_3)
       18    0.814    0.045    1.109    0.062 core.py:1011(__call__)
       40    0.931    0.023    0.931    0.023 {built-in method numpy.core._multiarray_umath.implement_array_function}
        2    0.000    0.000    0.652    0.326 core.py:6927(concatenate)
        9    0.000    0.000    0.650    0.072 core.py:4131(__add__)
        3    0.000    0.000    0.643    0.214 <__array_function__ internals>:2(concatenate)
        9    0.000    0.000    0.459    0.051 core.py:4165(__mul__)
        2    0.292    0.146    0.334    0.167 core.py:3340(__setitem__)
       18    0.000    0.000    0.

        1    0.000    0.000    0.000    0.000 {method 'startswith' of 'str' objects}
        1    0.000    0.000    0.000    0.000 {method 'endswith' of 'str' objects}
        1    0.000    0.000    0.000    0.000 {method 'values' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 {method 'decode' of 'bytes' objects}
        1    0.000    0.000    0.000    0.000 {method 'startswith' of 'bytes' objects}
        8    0.000    0.000    0.000    0.000 dataarray.py:142(<genexpr>)
        1    0.000    0.000    0.000    0.000 core.py:221(_get_dtype_of)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.000    0.000    0.000    0.000 codecs.py:260(__init__)
        2    0.000    0.000    0.000    0.000 {method 'append' of 'collections.deque' objects}
        1    0.000    0.000    0.000    0.000 variable.py:2583(<listcomp>)




In [18]:
cProfile.run("method_4(files,10)", sort='cumtime')

/nird/home/yro/temp/temp.0.nc
         4892 function calls (4715 primitive calls) in 8.985 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    8.985    8.985 {built-in method builtins.exec}
        1    0.000    0.000    8.985    8.985 <string>:1(<module>)
        1    0.000    0.000    8.985    8.985 <ipython-input-10-1e025563b5fe>:1(method_4)
        1    0.000    0.000    8.953    8.953 <ipython-input-9-c8e57ab4d66a>:1(cat_in_chunks)
        2    0.000    0.000    8.952    4.476 subprocess.py:331(call)
        4    0.000    0.000    8.836    2.209 subprocess.py:1014(wait)
        4    0.000    0.000    8.836    2.209 subprocess.py:1621(_wait)
        2    0.000    0.000    8.836    4.418 subprocess.py:1608(_try_wait)
        2    8.836    4.418    8.836    4.418 {built-in method posix.waitpid}
        2    0.000    0.000    0.116    0.058 subprocess.py:681(__init__)
        2    0.002    0.00

        1    0.000    0.000    0.000    0.000 dataset.py:567(variables)
       50    0.000    0.000    0.000    0.000 variable.py:492(dims)
        3    0.000    0.000    0.000    0.000 common.py:1296(is_float_dtype)
        1    0.000    0.000    0.000    0.000 file_manager.py:307(__init__)
        1    0.000    0.000    0.000    0.000 locks.py:117(acquire)
       10    0.000    0.000    0.000    0.000 inference.py:322(is_hashable)
        3    0.000    0.000    0.000    0.000 {method 'astype' of 'numpy.ndarray' objects}
        3    0.000    0.000    0.000    0.000 {method 'filters' of 'netCDF4._netCDF4.Variable' objects}
        3    0.000    0.000    0.000    0.000 threading.py:1050(_wait_for_tstate_lock)
        1    0.000    0.000    0.000    0.000 posixpath.py:64(isabs)
        4    0.000    0.000    0.000    0.000 {built-in method builtins.sum}
       22    0.000    0.000    0.000    0.000 {method 'rpartition' of 'str' objects}
       16    0.000    0.000    0.000    0.000 {bui

        1    0.000    0.000    0.000    0.000 fromnumeric.py:3003(_cumprod_dispatcher)
        1    0.000    0.000    0.000    0.000 fromnumeric.py:3071(_ndim_dispatcher)
        2    0.000    0.000    0.000    0.000 multiarray.py:143(concatenate)
        4    0.000    0.000    0.000    0.000 multiarray.py:1043(copyto)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        3    0.000    0.000    0.000    0.000 threading.py:507(is_set)




### Test for 100 years

In [19]:
start_ts, end_ts = 1701, 1800
files = [f"{data_folder}/ocn/hist/NBF1850_f19_tn11_test_mis3b_fwf3b_fram.micom.hy.{year}.nc" for year in range(start_ts, end_ts)]

In [20]:
cProfile.run("method_1(files)", sort='cumtime')

         6227350 function calls (6097392 primitive calls) in 36.680 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   36.680   36.680 {built-in method builtins.exec}
        1    0.376    0.376   36.680   36.680 <string>:1(<module>)
        1    6.332    6.332   36.304   36.304 <ipython-input-3-86b087e63657>:3(method_1)
        1    0.000    0.000   20.515   20.515 concat.py:11(concat)
        1    0.007    0.007   20.515   20.515 concat.py:429(_dataarray_concat)
        1    0.000    0.000   20.501   20.501 concat.py:309(_dataset_concat)
23673/14659    1.178    0.000   20.487    0.001 {built-in method numpy.array}
10502/2679    0.005    0.000   20.449    0.008 _asarray.py:14(asarray)
 1584/891    0.006    0.000   20.422    0.023 indexing.py:555(__array__)
        2    0.000    0.000   20.406   10.203 variable.py:2524(concat)
        1    0.000    0.000   20.402   20.402 variable.py:1622(concat

      990    0.002    0.000    0.003    0.000 indexing.py:1244(__init__)
     3762    0.003    0.000    0.003    0.000 {method 'split' of 'str' objects}
        1    0.000    0.000    0.003    0.003 base.py:4155(_concat)
       99    0.000    0.000    0.002    0.000 netCDF4_.py:412(get_encoding)
     5544    0.002    0.000    0.002    0.000 strings.py:42(__init__)
       99    0.001    0.000    0.002    0.000 utils.py:606(is_remote_uri)
     1782    0.001    0.000    0.002    0.000 utils.py:990(__enter__)
       99    0.001    0.000    0.002    0.000 indexing.py:278(slice_slice)
    11187    0.002    0.000    0.002    0.000 fromnumeric.py:1701(_ravel_dispatcher)
    11212    0.002    0.000    0.002    0.000 utils.py:418(__init__)
     1790    0.002    0.000    0.002    0.000 _collections_abc.py:676(items)
        1    0.000    0.000    0.002    0.002 concat.py:110(concat_compat)
      797    0.002    0.000    0.002    0.000 common.py:1733(pandas_dtype)
     5544    0.002    0.000    0.

In [21]:
cProfile.run("method_2(files)", sort='cumtime')

         15456158 function calls (15146123 primitive calls) in 20.102 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   20.191   20.191 {built-in method builtins.exec}
        1    0.002    0.002   20.191   20.191 <string>:1(<module>)
        1    0.048    0.048   19.811   19.811 <ipython-input-4-aae7034360ec>:3(method_2)
        1    0.000    0.000   19.761   19.761 api.py:724(open_mfdataset)
        1    0.001    0.001   16.349   16.349 api.py:918(<listcomp>)
       99    0.005    0.000   16.348    0.165 api.py:288(open_dataset)
       99    0.006    0.000   11.143    0.113 api.py:450(maybe_decode_store)
       99    0.007    0.000    7.159    0.072 conventions.py:517(decode_cf)
     6831    0.026    0.000    5.370    0.001 netCDF4_.py:360(_acquire)
18909/12078    0.022    0.000    5.306    0.000 contextlib.py:107(__enter__)
38711/25049    0.018    0.000    5.298    0.000 {built-in method bui

     5544    0.006    0.000    0.010    0.000 dataset.py:1772(selkeys)
      303    0.006    0.000    0.010    0.000 {pandas._libs.lib.infer_dtype}
    24397    0.007    0.000    0.010    0.000 core.py:1045(<genexpr>)
     4950    0.003    0.000    0.010    0.000 core.py:1109(npartitions)
    13860    0.010    0.000    0.010    0.000 file_manager.py:157(_optional_lock)
      198    0.000    0.000    0.010    0.000 <__array_function__ internals>:2(nanargmax)
      399    0.003    0.000    0.010    0.000 dataset.py:883(_replace)
     5854    0.001    0.000    0.009    0.000 alignment.py:422(<genexpr>)
       50    0.002    0.000    0.009    0.000 duck_array_ops.py:177(<listcomp>)
     1584    0.004    0.000    0.009    0.000 indexing.py:392(__init__)
      198    0.001    0.000    0.009    0.000 nanfunctions.py:507(nanargmax)
    19800    0.007    0.000    0.009    0.000 inspect.py:158(isfunction)
      393    0.000    0.000    0.009    0.000 common.py:598(is_excluded_dtype)
     2091   

        6    0.001    0.000    0.002    0.000 combine.py:60(<listcomp>)
      297    0.000    0.000    0.002    0.000 dataset.py:614(sizes)
      357    0.001    0.000    0.002    0.000 _dtype.py:321(_name_get)
    10098    0.002    0.000    0.002    0.000 core.py:1066(__dask_graph__)
      796    0.001    0.000    0.002    0.000 common.py:806(is_unsigned_integer_dtype)
      855    0.002    0.000    0.002    0.000 common.py:1733(pandas_dtype)
     2711    0.002    0.000    0.002    0.000 fromnumeric.py:71(<dictcomp>)
        1    0.000    0.000    0.002    0.002 common.py:225(__getattr__)
      149    0.000    0.000    0.002    0.000 common.py:151(get_axis_num)
        1    0.000    0.000    0.002    0.002 concat.py:110(concat_compat)
       99    0.000    0.000    0.002    0.000 posixpath.py:376(abspath)
        9    0.000    0.000    0.002    0.000 coordinates.py:211(__getitem__)
     5091    0.002    0.000    0.002    0.000 blockwise.py:855(<listcomp>)
    10001    0.002    0.000  

        1    0.000    0.000    0.000    0.000 _internal.py:565(__dtype_from_pep3118)
      150    0.000    0.000    0.000    0.000 multiarray.py:143(concatenate)
      198    0.000    0.000    0.000    0.000 variable.py:1390(<genexpr>)
      198    0.000    0.000    0.000    0.000 utils.py:594(close_on_error)
       99    0.000    0.000    0.000    0.000 base.py:669(size)
        6    0.000    0.000    0.000    0.000 range.py:389(_shallow_copy)
      392    0.000    0.000    0.000    0.000 numeric.py:71(_zeros_like_dispatcher)
        1    0.000    0.000    0.000    0.000 cast.py:892(astype_nansafe)
        3    0.000    0.000    0.000    0.000 generic.py:5132(__setattr__)
      198    0.000    0.000    0.000    0.000 nanfunctions.py:503(_nanargmax_dispatcher)
      102    0.000    0.000    0.000    0.000 merge.py:494(merge_attrs)
      396    0.000    0.000    0.000    0.000 fromnumeric.py:2333(_all_dispatcher)
        1    0.000    0.000    0.000    0.000 base.py:4159(<listcomp>)
   

In [22]:
cProfile.run("method_2bis(files)", sort='cumtime')

         5815668 function calls (5766076 primitive calls) in 43.759 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   43.792   43.792 {built-in method builtins.exec}
        1    0.361    0.361   43.792   43.792 <string>:1(<module>)
        1    0.057    0.057   43.431   43.431 <ipython-input-5-dfab172b92aa>:1(method_2bis)
        2    0.000    0.000   38.819   19.410 base.py:389(compute)
        2    0.001    0.000   35.547   17.774 threaded.py:33(get)
        2    0.004    0.002   35.546   17.773 local.py:347(get_async)
      396    0.001    0.000   35.490    0.090 local.py:132(queue_get)
      396    0.004    0.000   35.489    0.090 queue.py:153(get)
      396    0.003    0.000   35.481    0.090 threading.py:264(wait)
     2079   35.477    0.017   35.477    0.017 {method 'acquire' of '_thread.lock' objects}
        1    0.001    0.001   30.867   30.867 api.py:724(open_mfdataset)
        1   

        9    0.000    0.000    0.002    0.000 coordinates.py:211(__getitem__)
     9252    0.002    0.000    0.002    0.000 {built-in method builtins.issubclass}
      507    0.000    0.000    0.002    0.000 common.py:530(is_categorical_dtype)
     5091    0.001    0.000    0.002    0.000 core.py:3116(<listcomp>)
        2    0.000    0.000    0.002    0.001 base.py:303(<listcomp>)
  810/207    0.001    0.000    0.002    0.000 core.py:86(_execute_task)
        2    0.000    0.000    0.002    0.001 base.py:237(_extract_graph_and_keys)
       99    0.001    0.000    0.002    0.000 optimization.py:378(default_fused_keys_renamer)
      600    0.002    0.000    0.002    0.000 {method 'format' of 'str' objects}
       52    0.000    0.000    0.002    0.000 algorithms.py:69(_ensure_data)
     5049    0.001    0.000    0.002    0.000 core.py:3731(asarray)
     4950    0.001    0.000    0.002    0.000 core.py:3497(<lambda>)
        1    0.000    0.000    0.002    0.002 dataset.py:3698(drop_vars

        1    0.000    0.000    0.000    0.000 optimization.py:557(<setcomp>)
      102    0.000    0.000    0.000    0.000 dataset.py:578(attrs)
      297    0.000    0.000    0.000    0.000 core.py:4075(<genexpr>)
       24    0.000    0.000    0.000    0.000 base.py:1210(_set_names)
       99    0.000    0.000    0.000    0.000 _collections_abc.py:672(keys)
       63    0.000    0.000    0.000    0.000 common.py:1733(pandas_dtype)
       99    0.000    0.000    0.000    0.000 delayed.py:486(__dask_layers__)
       62    0.000    0.000    0.000    0.000 alignment.py:363(is_alignable)
        3    0.000    0.000    0.000    0.000 generic.py:5132(__setattr__)
      100    0.000    0.000    0.000    0.000 base.py:3890(_get_engine_target)
        1    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(lexsort)
      497    0.000    0.000    0.000    0.000 variable.py:2274(load)
        2    0.000    0.000    0.000    0.000 base.py:5718(_maybe_cast_data_without_dtype)
      

In [23]:
cProfile.run("method_2ter(files)", sort='cumtime')

         5819042 function calls (5768658 primitive calls) in 43.620 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   43.655   43.655 {built-in method builtins.exec}
        1    0.333    0.333   43.655   43.655 <string>:1(<module>)
        1    0.098    0.098   43.321   43.321 <ipython-input-6-fa818f8f674c>:1(method_2ter)
        2    0.000    0.000   39.121   19.560 base.py:389(compute)
        2    0.001    0.000   36.284   18.142 threaded.py:33(get)
        2    0.004    0.002   36.284   18.142 local.py:347(get_async)
      396    0.001    0.000   36.237    0.092 local.py:132(queue_get)
      396    0.004    0.000   36.236    0.092 queue.py:153(get)
      396    0.003    0.000   36.228    0.091 threading.py:264(wait)
     2079   36.224    0.017   36.224    0.017 {method 'acquire' of '_thread.lock' objects}
        1    0.001    0.001   30.700   30.700 api.py:724(open_mfdataset)
        1   

      600    0.002    0.000    0.002    0.000 {method 'format' of 'str' objects}
     4950    0.001    0.000    0.002    0.000 core.py:3769(asanyarray)
       99    0.001    0.000    0.002    0.000 optimization.py:378(default_fused_keys_renamer)
      495    0.001    0.000    0.002    0.000 duck_array_ops.py:180(lazy_array_equiv)
      214    0.000    0.000    0.002    0.000 base.py:1176(name)
     2802    0.001    0.000    0.001    0.000 common.py:1565(_get_dtype)
     4093    0.001    0.000    0.001    0.000 {method 'add' of 'set' objects}
      594    0.001    0.000    0.001    0.000 common.py:1296(is_float_dtype)
       51    0.000    0.000    0.001    0.000 algorithms.py:272(_check_object_for_strings)
1983/1882    0.001    0.000    0.001    0.000 common.py:256(__setattr__)
     1091    0.001    0.000    0.001    0.000 dataclasses.py:1036(is_dataclass)
        1    0.000    0.000    0.001    0.001 optimization.py:330(<listcomp>)
    10150    0.001    0.000    0.001    0.000 copy.py

       99    0.000    0.000    0.000    0.000 core.py:281(subs)
      102    0.000    0.000    0.000    0.000 dataset.py:578(attrs)
      396    0.000    0.000    0.000    0.000 {method '__exit__' of '_thread.lock' objects}
       99    0.000    0.000    0.000    0.000 delayed.py:486(__dask_layers__)
       99    0.000    0.000    0.000    0.000 _collections_abc.py:672(keys)
      396    0.000    0.000    0.000    0.000 variable.py:1398(<genexpr>)
        4    0.000    0.000    0.000    0.000 dataset.py:195(<setcomp>)
      693    0.000    0.000    0.000    0.000 delayed.py:508(key)
       99    0.000    0.000    0.000    0.000 optimization.py:344(unwrap_partial)
       69    0.000    0.000    0.000    0.000 utils.py:630(hashable)
        1    0.000    0.000    0.000    0.000 optimization.py:557(<setcomp>)
      149    0.000    0.000    0.000    0.000 core.py:1331(name)
       62    0.000    0.000    0.000    0.000 alignment.py:363(is_alignable)
      396    0.000    0.000    0.000    

In [24]:
cProfile.run("method_2four(files)", sort='cumtime')

         15497769 function calls (15187203 primitive calls) in 32.897 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   32.983   32.983 {built-in method builtins.exec}
        1    0.349    0.349   32.983   32.983 <string>:1(<module>)
        1    0.046    0.046   32.634   32.634 <ipython-input-7-ac623c7e9dc6>:1(method_2four)
        1    0.000    0.000   19.558   19.558 api.py:724(open_mfdataset)
        1    0.001    0.001   16.210   16.210 api.py:918(<listcomp>)
       99    0.005    0.000   16.209    0.164 api.py:288(open_dataset)
        1    0.004    0.004   13.024   13.024 dataarray.py:796(load)
        1    0.331    0.331   12.591   12.591 dataset.py:629(load)
        1    0.000    0.000   12.260   12.260 base.py:389(compute)
       99    0.006    0.000   10.921    0.110 api.py:450(maybe_decode_store)
        1    0.000    0.000    9.831    9.831 threaded.py:33(get)
        1    0.002  

     5544    0.008    0.000    0.010    0.000 netCDF4_.py:171(_ensure_fill_value_valid)
      303    0.006    0.000    0.010    0.000 {pandas._libs.lib.infer_dtype}
     5544    0.010    0.000    0.010    0.000 {method 'filters' of 'netCDF4._netCDF4.Variable' objects}
     4950    0.003    0.000    0.010    0.000 core.py:1109(npartitions)
     5797    0.010    0.000    0.010    0.000 {built-in method _hashlib.openssl_md5}
    17353    0.007    0.000    0.010    0.000 base.py:563(__len__)
    24402    0.007    0.000    0.010    0.000 core.py:1045(<genexpr>)
     5547    0.006    0.000    0.010    0.000 dataset.py:1772(selkeys)
      198    0.000    0.000    0.010    0.000 <__array_function__ internals>:2(nanargmax)
     1990    0.002    0.000    0.010    0.000 common.py:492(is_interval_dtype)
     1188    0.003    0.000    0.010    0.000 strings.py:222(dtype)
    13860    0.010    0.000    0.010    0.000 file_manager.py:157(_optional_lock)
     6835    0.005    0.000    0.010    0.000 c

        1    0.000    0.000    0.001    0.001 optimization.py:330(<listcomp>)
     1792    0.001    0.000    0.001    0.000 {method '__enter__' of '_thread.lock' objects}
     3940    0.001    0.000    0.001    0.000 {method 'add' of 'set' objects}
      792    0.001    0.000    0.001    0.000 indexing.py:760(<listcomp>)
      394    0.001    0.000    0.001    0.000 {built-in method numpy.zeros}
       99    0.000    0.000    0.001    0.000 indexing.py:48(_expand_slice)
      491    0.000    0.000    0.001    0.000 types.py:164(__get__)
      397    0.001    0.000    0.001    0.000 _collections_abc.py:672(keys)
       50    0.001    0.000    0.001    0.000 blockwise.py:844(<listcomp>)
        3    0.000    0.000    0.001    0.000 series.py:201(__init__)
        2    0.000    0.000    0.001    0.000 dataarray.py:421(_to_temp_dataset)
        2    0.000    0.000    0.001    0.000 dataarray.py:449(_to_dataset_whole)
      198    0.001    0.000    0.001    0.000 _internal.py:827(npy_ctypes

        9    0.000    0.000    0.000    0.000 dataset.py:1211(__contains__)
        1    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(putmask)
       99    0.000    0.000    0.000    0.000 {method 'pop' of 'set' objects}
        2    0.000    0.000    0.000    0.000 coordinates.py:70(__contains__)
        2    0.000    0.000    0.000    0.000 series.py:498(values)
        1    0.000    0.000    0.000    0.000 variable.py:2334(<listcomp>)
        3    0.000    0.000    0.000    0.000 blocks.py:237(mgr_locs)
        1    0.000    0.000    0.000    0.000 _internal.py:540(consume_until)
        1    0.000    0.000    0.000    0.000 utils.py:588(__dask_tokenize__)
        1    0.000    0.000    0.000    0.000 local.py:182(<listcomp>)
        6    0.000    0.000    0.000    0.000 core.py:121(<genexpr>)
        3    0.000    0.000    0.000    0.000 generic.py:5114(__getattr__)
        2    0.000    0.000    0.000    0.000 construction.py:575(is_empty_data)
        1    0.

In [25]:
cProfile.run("method_3(files)", sort='cumtime')

  


         66849 function calls (66804 primitive calls) in 46.742 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   46.742   46.742 {built-in method builtins.exec}
        1    0.649    0.649   46.742   46.742 <string>:1(<module>)
        1   22.166   22.166   46.093   46.093 <ipython-input-8-82b5311a71a3>:1(method_3)
      198    9.234    0.047   11.856    0.060 core.py:1011(__call__)
      400    9.898    0.025    9.898    0.025 {built-in method numpy.core._multiarray_umath.implement_array_function}
        2    0.000    0.000    7.366    3.683 core.py:6927(concatenate)
        3    0.000    0.000    7.345    2.448 <__array_function__ internals>:2(concatenate)
       99    0.001    0.000    6.271    0.063 core.py:4131(__add__)
       99    0.001    0.000    5.588    0.056 core.py:4165(__mul__)
        2    3.177    1.588    3.308    1.654 core.py:3340(__setitem__)
      198    0.001    0.000   

In [27]:
cProfile.run("method_4(files,100)", sort='cumtime')

/nird/home/yro/temp/temp.0.nc
         5252 function calls (5075 primitive calls) in 93.757 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   93.757   93.757 {built-in method builtins.exec}
        1    0.000    0.000   93.757   93.757 <string>:1(<module>)
        1    0.000    0.000   93.757   93.757 <ipython-input-10-1e025563b5fe>:1(method_4)
        1    0.000    0.000   93.715   93.715 <ipython-input-9-c8e57ab4d66a>:1(cat_in_chunks)
        2    0.000    0.000   93.714   46.857 subprocess.py:331(call)
        4    0.000    0.000   93.285   23.321 subprocess.py:1014(wait)
        4    0.000    0.000   93.285   23.321 subprocess.py:1621(_wait)
        2    0.000    0.000   93.285   46.642 subprocess.py:1608(_try_wait)
        2   93.285   46.642   93.285   46.642 {built-in method posix.waitpid}
        2    0.000    0.000    0.428    0.214 subprocess.py:681(__init__)
        2    0.003    0.0

        8    0.000    0.000    0.000    0.000 {method '__enter__' of '_thread.lock' objects}
        9    0.000    0.000    0.000    0.000 {built-in method posix.fspath}
       11    0.000    0.000    0.000    0.000 {built-in method builtins.hash}
       35    0.000    0.000    0.000    0.000 {method 'items' of 'dict' objects}
        2    0.000    0.000    0.000    0.000 conventions.py:505(<genexpr>)
        6    0.000    0.000    0.000    0.000 variables.py:110(safe_setitem)
        1    0.000    0.000    0.000    0.000 dataset.py:3686(_assert_all_in_dataset)
       27    0.000    0.000    0.000    0.000 variable.py:796(attrs)
        5    0.000    0.000    0.000    0.000 utils.py:418(__init__)
       19    0.000    0.000    0.000    0.000 utils.py:421(__getitem__)
        6    0.000    0.000    0.000    0.000 base.py:540(_reset_identity)
        2    0.000    0.000    0.000    0.000 base.py:569(__array__)
        4    0.000    0.000    0.000    0.000 common.py:422(is_timedelta64_dty

### Test for 1000 years

In [12]:
start_ts, end_ts = 1701, 2500
files = [f"{data_folder}/ocn/hist/NBF1850_f19_tn11_test_mis3b_fwf3b_fram.micom.hy.{year}.nc" for year in range(start_ts, end_ts)]

In [15]:
# cProfile.run("method_1(files)", sort='cumtime')
# Memory error

In [16]:
cProfile.run("method_2(files)", sort='cumtime')

         124203051 function calls (121704045 primitive calls) in 172.779 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000  173.457  173.457 {built-in method builtins.exec}
        1    0.023    0.023  173.457  173.457 <string>:1(<module>)
        1    0.604    0.604  173.011  173.011 <ipython-input-4-aae7034360ec>:3(method_2)
        1    0.000    0.000  172.404  172.404 api.py:724(open_mfdataset)
        1    0.006    0.006  145.714  145.714 api.py:918(<listcomp>)
      799    0.047    0.000  145.708    0.182 api.py:288(open_dataset)
      799    0.049    0.000   90.779    0.114 api.py:450(maybe_decode_store)
      799    0.070    0.000   61.481    0.077 conventions.py:517(decode_cf)
    55131    0.216    0.000   56.255    0.001 netCDF4_.py:360(_acquire)
151938/96807    0.177    0.000   55.724    0.001 contextlib.py:107(__enter__)
311069/200807    0.142    0.000   55.666    0.000 {built-in meth

      870    0.022    0.000    0.082    0.000 coordinates.py:208(<dictcomp>)
   110518    0.082    0.000    0.082    0.000 file_manager.py:157(_optional_lock)
    46392    0.082    0.000    0.082    0.000 {built-in method _hashlib.openssl_md5}
   159800    0.065    0.000    0.081    0.000 inspect.py:158(isfunction)
   195197    0.061    0.000    0.081    0.000 core.py:1045(<genexpr>)
     1598    0.002    0.000    0.081    0.000 <__array_function__ internals>:2(nanargmax)
     9588    0.027    0.000    0.080    0.000 strings.py:222(dtype)
    44744    0.050    0.000    0.080    0.000 dataset.py:1772(selkeys)
   200549    0.077    0.000    0.077    0.000 core.py:1170(dtype)
      799    0.006    0.000    0.076    0.000 indexing.py:493(__getitem__)
     1598    0.008    0.000    0.076    0.000 nanfunctions.py:507(nanargmax)
    15990    0.016    0.000    0.076    0.000 common.py:492(is_interval_dtype)
       50    0.044    0.001    0.075    0.002 core.py:3572(<listcomp>)
    54435    0.0

      799    0.001    0.000    0.006    0.000 indexing.py:48(_expand_slice)
     1598    0.006    0.000    0.006    0.000 _internal.py:827(npy_ctypes_check)
     3196    0.005    0.000    0.006    0.000 _collections_abc.py:672(keys)
     2400    0.006    0.000    0.006    0.000 dataset.py:381(__iter__)
      799    0.006    0.000    0.006    0.000 {method 'filepath' of 'netCDF4._netCDF4.Dataset' objects}
     3197    0.005    0.000    0.005    0.000 {method 'reshape' of 'numpy.ndarray' objects}
    18412    0.005    0.000    0.005    0.000 base.py:1169(name)
      799    0.003    0.000    0.005    0.000 file_manager.py:283(increment)
     7990    0.005    0.000    0.005    0.000 {method 'isunlimited' of 'netCDF4._netCDF4.Dimension' objects}
      800    0.005    0.000    0.005    0.000 dataset.py:3686(_assert_all_in_dataset)
     3192    0.005    0.000    0.005    0.000 {built-in method numpy.zeros}
       51    0.000    0.000    0.005    0.000 algorithms.py:272(_check_object_for_strin

      100    0.000    0.000    0.000    0.000 dtypes.py:165(<genexpr>)
       35    0.000    0.000    0.000    0.000 sre_parse.py:254(get)
       69    0.000    0.000    0.000    0.000 utils.py:630(hashable)
       54    0.000    0.000    0.000    0.000 range.py:683(__len__)
        9    0.000    0.000    0.000    0.000 dataset.py:1211(__contains__)
        1    0.000    0.000    0.000    0.000 algorithms.py:255(_get_values_for_rank)
      128    0.000    0.000    0.000    0.000 {method 'release' of '_thread.lock' objects}
        2    0.000    0.000    0.000    0.000 construction.py:575(is_empty_data)
        1    0.000    0.000    0.000    0.000 function_base.py:1152(diff)
        3    0.000    0.000    0.000    0.000 generic.py:195(__init__)
       37    0.000    0.000    0.000    0.000 sre_parse.py:233(__next)
        2    0.000    0.000    0.000    0.000 series.py:398(_set_axis)
        4    0.000    0.000    0.000    0.000 _collections_abc.py:72(_check_methods)
        1    0.000

In [17]:
cProfile.run("method_2bis(files)", sort='cumtime')

         46317327 function calls (45941262 primitive calls) in 379.856 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      3/1    0.000    0.000  380.117  380.117 {built-in method builtins.exec}
        1    0.000    0.000  380.117  380.117 <string>:1(<module>)
        1    0.661    0.661  380.117  380.117 <ipython-input-5-dfab172b92aa>:1(method_2bis)
        2    0.002    0.001  348.377  174.188 base.py:389(compute)
        2    0.002    0.001  348.235  174.118 threaded.py:33(get)
        2    0.036    0.018  348.228  174.114 local.py:347(get_async)
     3015    0.006    0.000  347.825    0.115 local.py:132(queue_get)
     3015    0.031    0.000  347.819    0.115 queue.py:153(get)
     3022    0.020    0.000  347.758    0.115 threading.py:264(wait)
    15103  347.727    0.023  347.727    0.023 {method 'acquire' of '_thread.lock' objects}
        1    0.005    0.005  259.853  259.853 api.py:724(open_mfdataset)
        1

    22345    0.011    0.000    0.088    0.000 common.py:595(condition)
    69627    0.049    0.000    0.088    0.000 base.py:563(__len__)
   253742    0.086    0.000    0.086    0.000 variable.py:492(dims)
    12782    0.013    0.000    0.084    0.000 common.py:456(is_period_dtype)
    95246    0.052    0.000    0.084    0.000 _collections_abc.py:742(__iter__)
6392/2397    0.012    0.000    0.083    0.000 delayed.py:23(unzip)
80786/80758    0.030    0.000    0.082    0.000 abc.py:141(__subclasscheck__)
       50    0.047    0.001    0.081    0.002 core.py:3572(<listcomp>)
    40749    0.032    0.000    0.081    0.000 concat.py:191(<genexpr>)
     3014    0.052    0.000    0.081    0.000 local.py:248(finish_task)
    40141    0.057    0.000    0.079    0.000 blockwise.py:855(<genexpr>)
   195996    0.060    0.000    0.079    0.000 core.py:2475(<genexpr>)
51237/4096    0.036    0.000    0.078    0.000 utils.py:502(__call__)
     3193    0.004    0.000    0.078    0.000 common.py:598(is_e

       20    0.000    0.000    0.003    0.000 threading.py:834(start)
        3    0.001    0.000    0.003    0.001 core.py:115(<listcomp>)
        1    0.002    0.002    0.003    0.003 algorithms.py:873(rank)
     3995    0.001    0.000    0.003    0.000 core.py:43(has_tasks)
        2    0.001    0.001    0.003    0.001 highlevelgraph.py:199(merge)
    14899    0.003    0.000    0.003    0.000 order.py:690(__lt__)
     4030    0.003    0.000    0.003    0.000 base.py:1169(name)
      103    0.003    0.000    0.003    0.000 {built-in method numpy.empty}
    13913    0.002    0.000    0.002    0.000 {method 'items' of 'dict' objects}
    10393    0.002    0.000    0.002    0.000 utils.py:452(__getitem__)
       17    0.000    0.000    0.002    0.000 pool.py:796(Process)
      880    0.001    0.000    0.002    0.000 dataset.py:1379(coords)
       17    0.000    0.000    0.002    0.000 __init__.py:43(start)
     3022    0.002    0.000    0.002    0.000 threading.py:249(_release_save)
   

        2    0.000    0.000    0.000    0.000 alignment.py:20(_get_joiner)
        1    0.000    0.000    0.000    0.000 coordinates.py:70(__contains__)
        2    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:369(_get_cached)
        1    0.000    0.000    0.000    0.000 {method 'throw' of 'generator' objects}
        1    0.000    0.000    0.000    0.000 function_base.py:1152(diff)
       29    0.000    0.000    0.000    0.000 sre_parse.py:254(get)
        1    0.000    0.000    0.000    0.000 generic.py:328(_construct_axes_dict)
        2    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:951(path_stats)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:1445(path_hook_for_FileFinder)
       33    0.000    0.000    0.000    0.000 base.py:540(_reset_identity)
        2    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:1351(_get_spec)
        2    0.000    0.000    0.000    0

        2    0.000    0.000    0.000    0.000 sre_compile.py:595(isstring)
        1    0.000    0.000    0.000    0.000 _collections_abc.py:392(__subclasshook__)
        2    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:369(__init__)
        2    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap_external>:401(_check_name_wrapper)
        2    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:143(__init__)
        1    0.000    0.000    0.000    0.000 {built-in method _sre.compile}
        1    0.000    0.000    0.000    0.000 {built-in method posix.getpid}
        1    0.000    0.000    0.000    0.000 __init__.py:34(DummyProcess)
        1    0.000    0.000    0.000    0.000 connection.py:18(Listener)
        1    0.000    0.000    0.000    0.000 connection.py:51(Connection)
        1    0.000    0.000    0.000    0.000 api.py:716(__init__)
        3    0.000    0.000    0.000    0.000 merge.py:494(merge_attrs)
        1    0.000    0.000    

MemoryError: Unable to allocate 28.0 MiB for an array with shape (1, 53, 385, 360) and data type float32

In [None]:
cProfile.run("method_2ter(files)", sort='cumtime')

In [None]:
cProfile.run("method_2four(files)", sort='cumtime')

In [13]:
cProfile.run("method_3(files)", sort='cumtime')

  


         217140 function calls in 83.881 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   83.881   83.881 {built-in method builtins.exec}
        1    0.000    0.000   83.881   83.881 <string>:1(<module>)
        1   63.997   63.997   83.881   83.881 <ipython-input-9-82b5311a71a3>:1(method_3)
      324   15.375    0.047   19.725    0.061 core.py:1011(__call__)
      162    0.001    0.000   10.081    0.062 core.py:4131(__add__)
      162    0.001    0.000    9.647    0.060 core.py:4165(__mul__)
      646    4.240    0.007    4.241    0.007 {built-in method numpy.core._multiarray_umath.implement_array_function}
      323    0.001    0.000    4.238    0.013 <__array_function__ internals>:2(copyto)
      972    0.090    0.000    0.090    0.000 {method 'reduce' of 'numpy.ufunc' objects}
      648    0.001    0.000    0.087    0.000 _methods.py:53(_any)
   159068    0.056    0.000    0.056    0.000 

MemoryError: Unable to allocate 56.0 MiB for an array with shape (1, 53, 385, 360) and data type float64

In [14]:
cProfile.run("method_4(files,1000)", sort='cumtime')

/nird/home/yro/temp/temp.0.nc
         8297 function calls (8086 primitive calls) in 764.235 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000  764.235  764.235 {built-in method builtins.exec}
        1    0.000    0.000  764.235  764.235 <string>:1(<module>)
        1    0.000    0.000  764.235  764.235 <ipython-input-11-1e025563b5fe>:1(method_4)
        1    0.000    0.000  764.160  764.160 <ipython-input-10-c8e57ab4d66a>:1(cat_in_chunks)
        2    0.000    0.000  764.160  382.080 subprocess.py:331(call)
        4    0.000    0.000  763.678  190.920 subprocess.py:1014(wait)
        4    0.000    0.000  763.678  190.920 subprocess.py:1621(_wait)
        2    0.000    0.000  763.678  381.839 subprocess.py:1608(_try_wait)
        2  763.678  381.839  763.678  381.839 {built-in method posix.waitpid}
        2    0.000    0.000    0.481    0.241 subprocess.py:681(__init__)
        2    0.002    0

       14    0.000    0.000    0.000    0.000 variables.py:64(dtype)
        3    0.000    0.000    0.000    0.000 variables.py:184(<listcomp>)
       50    0.000    0.000    0.000    0.000 variable.py:492(dims)
       18    0.000    0.000    0.000    0.000 base.py:563(__len__)
        3    0.000    0.000    0.000    0.000 {method 'ncattrs' of 'netCDF4._netCDF4.Variable' objects}
        2    0.000    0.000    0.000    0.000 subprocess.py:875(__del__)
        1    0.000    0.000    0.000    0.000 posixpath.py:232(expanduser)
        1    0.000    0.000    0.000    0.000 file_manager.py:279(__init__)
        1    0.000    0.000    0.000    0.000 lru_cache.py:65(__setitem__)
        1    0.000    0.000    0.000    0.000 dataset.py:857(_construct_direct)
        1    0.000    0.000    0.000    0.000 alignment.py:61(align)
        8    0.000    0.000    0.000    0.000 utils.py:993(__exit__)
        2    0.000    0.000    0.000    0.000 common.py:750(is_signed_integer_dtype)
        2    0.

## Test netcdf