In [2]:
import xarray as xr
import yaml
import numpy as np
import pathlib as pl

paths = snakemake.input.paths
paths = sorted(paths)
variable = snakemake.wildcards.variable
outdir = pl.Path('/'.join(snakemake.output.outpath.split('/')[:-1]))
model = snakemake.wildcards.model
experiment = snakemake.wildcards.experiment
time_slice = snakemake.params.get('time_slice',slice(1990,None))


In [3]:
file_ending_fmt = f'aerocom3_{model}_{experiment}_{variable}_Column_{{freq}}_monthly.nc'

In [7]:
def split_into_yearly_files(paths: list ,outdir: pl.Path, 
                            time_slice: slice, file_ending_fmt : str) -> dict:
    dsets = [xr.open_dataset(path) for path in paths]
    fnames = [path.split('/')[-1] for path in paths]
    file_tracker = {}
    start = -1
    stop = 1e6
    if time_slice.start:
        start=time_slice.start
    if time_slice.stop:
        stop = time_slice.stop
        
    
    for ds, fname in zip(dsets, fnames):
        file_tracker[fname] = []
        for year in np.unique(ds.time.dt.year.values):
            
            if year >= start and year <= stop:
                temp_ds = ds.sel(time=str(year))
                out_file = file_ending_fmt.format(freq=year)
                file_tracker[fname].append(out_file)
                outpath=outdir.joinpath(out_file)
                temp_ds.to_netcdf(outpath)
                
                
    return file_tracker
            
            
            

In [8]:
file_tracker = split_into_yearly_files(paths, outdir,time_slice, file_ending_fmt)

In [9]:
with open(snakemake.output.outpath, 'w') as f:
    yaml.safe_dump(file_tracker, f,default_flow_style=False)