## Compute monthly sea-ice concentration climatologies from EUMETSAT OSI SAF data

We compute monthly cliamtologies (1981-2010, 1991-2020) from EUMETSAT OSI SAF SIC CDR v3 data. This uses xarray.

**NB :** This software is for demonstration purpose only, it is not finalized for production.

In [1]:
import os
import xarray as xr

from datetime import date, datetime, timedelta, time
from dateutil import rrule, relativedelta

import cftime

import json
import uuid
from copy import copy
import numpy as np

Configuration of the run

In [2]:
area = 'nh'
outdir = '.'
indirs = 'osisaf_sic_cdr_storeB.json'

# climo : years to include in the climatology run. **Both ends are included**.
climo = (1991, 2000)

In [3]:
# input monthly SIC files
fn_patt = 'ice_conc_{a:}_ease2-250_{c:}_{d:%Y%m}.nc'
fn_patt_src = {'cdr': 'cdr-v3p0', 'icdr': 'icdr-v3p0', 'icdrft': 'icdrft-v3p0'}

if indirs is None:
    inpdir_cdr = 'https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_450a_files/monthly/'
    inpdir_icdr = 'https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_cra_files/monthly/'
    # access through THREDDS/OpenDAP
    sources = {'cdr':inpdir_cdr,
              'icdr':inpdir_icdr,
              'icdrft':inpdir_icdr}
    jsond = json.dumps(sources, sort_keys=True, indent=4)
else:
    # load json file with path to input directories (if the daily SIC files are downloaded to a local disk)
    # an example json file (prepare_monthly_osisaf_sic_opendap.json) is provided to demonstrate the format
    #   expected for the json file (but the effect will be the same as setting indirs to None: read from
    #   THREDDS/opendap)
    with open(indirs, 'r') as f:
        sources = json.load(f)
        for s in sources.keys():
            sources[s] += '/monthly/'

In [4]:
def find_one_monthly_sic_file(dt, area):
    
    found_one_file = False
    for cdr in ('cdr', 'icdr', 'icdrft'):
        fn = fn_patt.format(a=area, d=dt, c=fn_patt_src[cdr])
        fn = os.path.join(sources[cdr],'{:%Y/}'.format(dt),fn)
        try:
            # this url exists, append it and move to next date
            ds = xr.open_dataset(fn)
            found_one_file = True
            return fn, cdr
        except OSError:
            # no valid file at this url, check the next rule
            # print("Failed with pattern {}".format(fn))
            pass
        
    # no file found. Add a warning (but we can continue)
    if not found_one_file:
        print("WARNING: could not find OSI SAF SIC v3 file for {} {}".format(area, dt))
        return None, None

def find_climo_monthly_sic_file(year_start, year_end, area):
    
    files = []
    for year in range(year_start, year_end+1):
        for month in range(1,13):
            dt = date(year, month, 15)
            fn, src = find_one_monthly_sic_file(dt, area)
            if fn:
                files.append(fn)
    
    return files

climo_fns = find_climo_monthly_sic_file(climo[0], climo[1], area,)
if len(climo_fns) == 0:
    raise ValueError("Found no files for building the climatology by month.")

print("Open {} monthly files:".format(len(climo_fns)))
print(climo_fns[0])
print(' ... ')
print(' ... ')
print(climo_fns[-1])

Open 120 monthly files:
/lustre/storeB/project/copernicus/osisaf/data/reprocessed/ice/conc/v3p0//monthly/1991/ice_conc_nh_ease2-250_cdr-v3p0_199101.nc
 ... 
 ... 
/lustre/storeB/project/copernicus/osisaf/data/reprocessed/ice/conc/v3p0//monthly/2000/ice_conc_nh_ease2-250_cdr-v3p0_200012.nc


In [5]:
# Open one of the original files to extract information like dtype, _FillValue, time units, etc...
dso = xr.open_dataset(climo_fns[0],mask_and_scale=False,decode_times=False)

## Climatology

In [6]:
# A routine to reconstruct the unfiltered, unthresholded SICs from a OSI SAF SIC CDR file
def reconstruct_sic(ds):
    ice_conc = ds['ice_conc'].to_masked_array()
    raw_ice_conc_values = ds['raw_ice_conc_values'].to_masked_array()
    status_flag = ds['status_flag'].to_masked_array().astype('short')
    
    # combine ice_conc with raw_ice_conc_values using the status_flag
    new_ice_conc = copy(ice_conc)
    raw_100_mask = np.array((ice_conc==100) * (~raw_ice_conc_values.mask))
    new_ice_conc[raw_100_mask] = raw_ice_conc_values[raw_100_mask]
    
    new_ice_conc[(status_flag & 4) == 4] = raw_ice_conc_values[(status_flag & 4) == 4]

    # re-enter "full" ice_conc into the xarray dataset
    ds['ice_conc'][:] = new_ice_conc
    return ds

In [7]:
# open all the monthly files for the climatology period
climo_ds = xr.open_mfdataset(climo_fns, engine='netcdf4')
climo_ds = reconstruct_sic(climo_ds)

In [8]:
# remove some variablea and attributes before computing the climatology
climo_ds = climo_ds.drop_vars(['lat', 'lon', 'raw_ice_conc_values', 'status_flag', 'Lambert_Azimuthal_Grid', 'time_bnds'])

del climo_ds['ice_conc'].attrs['valid_min']
del climo_ds['ice_conc'].attrs['valid_max']
del climo_ds['ice_conc'].attrs['ancillary_variables']
del climo_ds['ice_conc'].attrs['comment']

In [9]:
# group the monthly fields by month, and compute the statistics
climo_ds = climo_ds.chunk({'time': -1})
climo_grpby = climo_ds.groupby('time.month')

# compute mean, sdev, and all quantiles (include median, min, and max)
quantiles = np.array([50, 0, 100, 5, 10, 25, 75, 90, 95])/100.
ds_mean = climo_grpby.mean(skipna=True, keep_attrs=True)
ds_mean = ds_mean.rename_vars({'ice_conc':'ice_conc_mean'})
ds_mean['ice_conc_mean'].attrs['cell_methods'] = 'time: mean within years time: mean over years'
ds_mean['ice_conc_mean'].attrs['long_name'] = 'mean sea-ice concentration over the climalogical period'

ds_sdev = climo_grpby.std(skipna=True, keep_attrs=True)
ds_sdev = ds_sdev.rename_vars({'ice_conc':'ice_conc_sdev'})
ds_sdev['ice_conc_sdev'].attrs['cell_methods'] = 'time: mean within years time: standard_deviation over years'
ds_sdev['ice_conc_sdev'].attrs['long_name'] = 'standard deviation of sea-ice concentration over the climalogical period'

ds_climo = xr.merge([ds_mean, ds_sdev], compat='override')

ds_quantiles = climo_grpby.quantile(quantiles, skipna=True, keep_attrs=True)
for q in quantiles:
    lname = None
    if q == 0:
        suff = 'min'
        meth = 'minimum'
    elif q == 1:
        suff = 'max'
        meth = 'maximum'
    elif q == 0.5:
        suff = 'median'
        meth = 'median'
    else:
        qpct = round(q*100)
        suff = '{:d}pctile'.format(qpct)
        meth = 'percentile_{:d}'.format(qpct)
        lname = '{}% Percentile'.format(qpct)

    if lname is None:
        lname = meth.capitalize()
                    
    ds_quant = ds_quantiles.sel(quantile=q)
    ds_quant = ds_quant.rename_vars({'ice_conc':'ice_conc_'+suff})
    ds_quant['ice_conc_'+suff].attrs['cell_methods'] = 'time: mean within years time: {} over years'.format(meth,)
    ds_quant['ice_conc_'+suff].attrs['long_name'] = lname + ' of sea-ice concentration over the climalogical period'
    
    ds_climo = xr.merge([ds_climo, ds_quant], compat='override')

ds_climo = ds_climo.drop_vars(['quantile',])

In [10]:
# Prepare the time coordinate
mid_year = int(round(0.5 * (climo[0] + climo[1])))
mid_time = [cftime.datetime(mid_year,m,16) for m in range(1,13)]
time_attrs = dso['time'].attrs.copy()
for a in dso['time'].attrs.keys():
    if a.startswith('_') or a in ('bounds', 'units', 'calendar'):
        del time_attrs[a]
mid_time_da = xr.DataArray(mid_time, dims="month", coords={'month': range(1,13)}, name='time',)

# Assign the time dimension and swap the dimension from 'month' to 'time'
ds_climo = ds_climo.assign_coords(time=mid_time_da)
ds_climo = ds_climo.swap_dims({"month": "time"})
ds_climo = ds_climo.drop_vars(('month',))

# add climatological time bounds
bound0 = [cftime.datetime(climo[0],m,1) for m in range(1,13)]
bound1 = [cftime.datetime(climo[1],m+1,1) for m in range(1,12)] + [cftime.datetime(climo[1]+1,1,1),]            
bounds = np.column_stack([bound0, bound1])
bounds_da = xr.DataArray(bounds, dims=['time','nv'], coords={'time': mid_time, },)
ds_climo = ds_climo.assign(climatology_bnds=bounds_da)

# fix attributes of the time variable
time_attrs['climatology'] = 'climatology_bnds'
ds_climo['time'].attrs = time_attrs

In [11]:
# transfer some variables back from the original files
ds_climo = ds_climo.assign_coords(lat=dso['lat'])
ds_climo = ds_climo.assign_coords(lon=dso['lon'])
ds_climo = ds_climo.assign(Lambert_Azimuthal_Grid = dso['Lambert_Azimuthal_Grid'])

In [12]:
# clip the ice_conc variables to 0 - 100%
for v in ds_climo.variables:
    if v.startswith('ice_conc_'):
        ds_climo[v] = ds_climo[v].clip(min=0, max=100)

In [13]:
# add x and y bounds. This should not be necessary here, these bounds should already have been present
#   in the monthly sea-ice concentration files
for ac in ('xc', 'yc'):
    diff = ds_climo[ac].values[1] - ds_climo[ac].values[0]
    resol = abs(diff)
    sign = diff / resol
    b0 = ds_climo[ac].values - 0.5*sign*resol
    b1 = ds_climo[ac].values + 0.5*sign*resol
    bnds = np.column_stack((b0,b1))
    ds_climo[ac + '_bnds'] = xr.DataArray(bnds, dims=(ac,'nv'))
    ds_climo[ac].attrs['bounds'] = ac + '_bnds'

### Write Climatology to a netCDF file

Xarray datasets can be written to netCDF files with the to_netcdf() method.

**Note: this is still preliminary format**

In [14]:
def get_first_last_date_climo(climo, dt):
    first_day = date(climo[0],dt.month,1)
    last_day = date(climo[1],dt.month,1) + relativedelta.relativedelta(months=1)-timedelta(days=1)
    return first_day, last_day

def get_first_last_date_month(dt):
    first_day = date(dt.year,dt.month,1)
    last_day = first_day + relativedelta.relativedelta(months=1)-timedelta(days=1)
    return first_day, last_day


In [15]:
# Amend some attributes
now = datetime.utcnow().replace(microsecond=0)
ds_climo.attrs['history'] = now.isoformat()+'Z' + ' creation'
ds_climo.attrs['date_created'] = now.date().isoformat()

ds_climo.attrs['tracking_id'] = str(uuid.uuid4())

if ds_climo.attrs['title'].startswith('Monthly'):
    ds_climo.attrs['title'] = ds_climo.attrs['title'].replace('Monthly','Monthly Climatology ({}-{}) of'.format(climo[0], climo[1]))

ds_climo.attrs['product_status'] = 'under development'

try:
    del ds_climo.attrs['doi']
    del ds_climo.attrs['naming_authority']
    del ds_climo.attrs['product_id']
    del ds_climo.attrs['product_name']
    del ds_climo.attrs['algorithm']
except:
    pass

try:
    #ISO 8601 time attributes are not straightforward for a monthly climalogy period. We remove them.
    del ds_climo.attrs['time_coverage_start'] 
    del ds_climo.attrs['time_coverage_end']
    del ds_climo.attrs['time_coverage_duration']
    del ds_climo.attrs['time_coverage_resolution']
except:
    pass


In [16]:
# encoding
encoding = {'time': {'dtype':dso['time'].dtype, 'units':dso['time'].units, 'calendar':dso['time'].calendar,'_FillValue':None},
            'climatology_bnds': {'dtype':dso['time'].dtype, 'units':dso['time'].units, 'calendar':dso['time'].calendar, '_FillValue':None},
            'lat': {'_FillValue':None}, 'lon': {'_FillValue':None},
            'xc': {'_FillValue':None}, 'yc': {'_FillValue':None},
            'xc_bnds': {'_FillValue':None}, 'yc_bnds': {'_FillValue':None},
            'Lambert_Azimuthal_Grid': {'_FillValue':None, 'dtype':dso['Lambert_Azimuthal_Grid'].dtype}
           }
climo_vars = []
for v in ds_climo.variables:
    if v.startswith('ice_conc_'):
        encoding[v] = {'dtype':np.float32, '_FillValue':np.float32(-999.)}
        climo_vars.append(v)

In [17]:
# reorder the variables in the dataset before writing to disk.
ordered_variables = ['Lambert_Azimuthal_Grid', 'time', 'climatology_bnds',
                     'xc', 'yc', 'xc_bnds', 'yc_bnds', 'lat', 'lon',] + climo_vars
if len(ordered_variables) != len(ds_climo.variables):
    raise ValueError("Missing some variables!")


ds_climo_2 = xr.Dataset({var_name: ds_climo[var_name] for var_name in ordered_variables}, attrs=ds_climo.attrs)

In [18]:
print(ds_climo_2)

<xarray.Dataset>
Dimensions:                 (time: 12, nv: 2, xc: 432, yc: 432)
Coordinates:
  * time                    (time) object 1996-01-16 00:00:00 ... 1996-12-16 ...
  * xc                      (xc) float64 -5.388e+03 -5.362e+03 ... 5.388e+03
  * yc                      (yc) float64 5.388e+03 5.362e+03 ... -5.388e+03
    lon                     (yc, xc) float32 -135.0 -135.1 -135.3 ... 44.87 45.0
    lat                     (yc, xc) float32 16.62 16.82 17.02 ... 16.82 16.62
Dimensions without coordinates: nv
Data variables: (12/15)
    Lambert_Azimuthal_Grid  int32 ...
    climatology_bnds        (time, nv) object 1991-01-01 00:00:00 ... 2001-01...
    xc_bnds                 (xc, nv) float64 -5.4e+03 -5.375e+03 ... 5.4e+03
    yc_bnds                 (yc, nv) float64 5.4e+03 5.375e+03 ... -5.4e+03
    ice_conc_mean           (time, yc, xc) float64 dask.array<chunksize=(1, 432, 432), meta=np.ndarray>
    ice_conc_sdev           (time, yc, xc) float64 dask.array<chunksize=(1, 4

In [19]:
# write to netCDF/CF

## Note : this filename is still just a proposal
outname = './ice_conc_{}_ease2-250_climatology-v3p0_{}-{}.nc'.format(area, climo[0], climo[1])

ds_climo_2.to_netcdf(outname, encoding=encoding)

print(outname, "is ready.")

  x = np.divide(x1, x2, out)
  result = np.apply_along_axis(_nanquantile_1d, axis, a, q,


./ice_conc_nh_ease2-250_climatology-v3p0_1991-2000.nc is ready.
