In [None]:
import clisops

# remove previosuly created example file
import os
if os.path.exists("./output_001.nc"):
    os.remove("./output_001.nc")

# Subsetting

The subset operation makes use of `clisops.core.subset` to process the datasets and to set the output type and the output file names.

In [None]:
from clisops.ops.subset import subset
import xarray as xr

`subset` takes several parameters:

Parameters
----------
    ds: xr.Dataset
    time: Tuple[dt, dt], optional
    area: Tuple[Union[int, float], Union[int, float],Union[int, float],Union[int, float]], optional
    level: int, optional
    output_dir: Union[str, Path], optional
    output_type: {"netcdf", "nc", "zarr", "xarray"}
    split_method: {"time:auto"}
    file_namer: {"standard"}
    
    
The output is a list containing the outputs in the format selected.    

In [None]:
ds = xr.open_mfdataset("tas/*.nc", use_cftime=True, combine="by_coords")

### Output to xarray

In [None]:
outputs = subset(
        ds=ds,
        time=("2007-01-01T00:00:00", "2200-12-30T00:00:00"),
        area=(0.0, 10.0, 175.0, 90.0),
        output_type="xarray",
    )

print(len(outputs))
print("There is only one output")

outputs[0]

### Output to netCDF with simple namer

In [None]:
outputs = subset(
        ds=ds,
        time=("2007-01-01T00:00:00", "2200-12-30T00:00:00"),
        area=(0.0, 10.0, 175.0, 90.0),
        output_type="nc",
        output_dir=".",
        split_method="time:auto",
        file_namer="simple"
    )

print("There is only one output as the file size is under the memory limit so does not need to be split.")
print("This example uses the simple namer which numbers output files")

In [None]:
# To open the file

subset_ds = xr.open_mfdataset("./output_001.nc", use_cftime=True, combine="by_coords")

subset_ds

### Output to netCDF with standard namer

In [None]:
outputs = subset(
        ds=ds,
        time=("2007-01-01T00:00:00", "2200-12-30T00:00:00"),
        area=(0.0, 10.0, 175.0, 90.0),
        output_type="nc",
        output_dir=".",
        split_method="time:auto",
        file_namer="standard"
    )

print("\nThere is only one output as the file size is under the memory limit so does not need to be split.")
print("This example uses the standard namer which names output filesa ccording the the input file and how it has been subsetted.")

### Subsetting by level

In [None]:
ds = xr.open_mfdataset("cmip6/*.nc", use_cftime=True, combine="by_coords")

#### No subsetting applied

In [None]:
result = subset(ds=ds,  
                output_type="xarray")

result[0].coords

#### Subsetting over level

In [None]:
# subsetting over plev 

result = subset(ds=ds,  
                level="600/100",
                output_type="xarray")

print(result[0].coords)

print("\nplev has been subsetted and now only has 2 values")