# Compute monthly SIC fields from OSI SAF data

In [1]:
import os
import xarray as xr
import numpy as np
from datetime import date, datetime, timedelta, time
from dateutil import rrule, relativedelta
import uuid
from copy import copy

from matplotlib import pylab as plt
from matplotlib import cm


In [2]:
dt = date(2005,1,12)
area = 'nh'

### Find all SIC files for a month

In [3]:
sources = {'cdr':('https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_450a_files/',
                'cdr-v3p0'),
            'icdr':('https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_cra_files/',
                'icdr-v3p0'),
            'icdrft':('https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_cra_files/',
                'icdrft-v3p0')}
fn_patt = 'ice_conc_{a:}_ease2-250_{c:}_{d:%Y%m%d}1200.nc'

def get_first_last_date(dt):
    first_day = date(dt.year,dt.month,1)
    last_day = first_day + relativedelta.relativedelta(months=1)-timedelta(days=1)
    return first_day, last_day

def find_sic_files(dt, area):

    files = []
    srcs = []
    # iterate over all days in the month
    first_day, last_day = get_first_last_date(dt)
    for d in rrule.rrule(rrule.DAILY, dtstart=first_day,
                                        until=last_day):
        # find the path/url to the file. There are precedence rules for what type of files
        #   to select.
        found_one_file = False
        for cdr in sources.keys():
            fn = fn_patt.format(a=area, d=d, c=sources[cdr][1])
            fn = os.path.join(sources[cdr][0],'{:%Y/%m/}'.format(d),fn)
            try:
                # this url exists, append it and move to next date
                ds = xr.open_dataset(fn)
                found_one_file = True
                files.append(fn)
                srcs.append(cdr)
                continue
            except OSError:
                # no valid file at this url, check the next rule
                pass
        
        # no file found. Add a warning (but we can continue)
        if not found_one_file:
            print("WARNING: could not find OSI SAF SIC v3 file for {} {}".format(area, d.date()))
        
    return files, srcs

files, srcs = find_sic_files(dt, area)
print(files)
print(srcs)

['https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_450a_files/2005/01/ice_conc_sh_ease2-250_cdr-v3p0_200501011200.nc', 'https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_450a_files/2005/01/ice_conc_sh_ease2-250_cdr-v3p0_200501021200.nc', 'https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_450a_files/2005/01/ice_conc_sh_ease2-250_cdr-v3p0_200501031200.nc', 'https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_450a_files/2005/01/ice_conc_sh_ease2-250_cdr-v3p0_200501041200.nc', 'https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_450a_files/2005/01/ice_conc_sh_ease2-250_cdr-v3p0_200501051200.nc', 'https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_450a_files/2005/01/ice_conc_sh_ease2-250_cdr-v3p0_200501061200.nc', 'https://thredds.met.no/thredds/dodsC/osisaf/met.no/reprocessed/ice/conc_450a_files/2005/01/ice_conc_sh_ease2-250_cdr-v3p0_200501071200.nc', 'https://thr

### Open access to all the files

In [4]:
ds = xr.open_mfdataset(files,)
print(ds)

<xarray.Dataset>
Dimensions:                         (time: 31, nv: 2, xc: 432, yc: 432)
Coordinates:
  * time                            (time) datetime64[ns] 2005-01-01T12:00:00...
  * xc                              (xc) float64 -5.388e+03 ... 5.388e+03
  * yc                              (yc) float64 5.388e+03 ... -5.388e+03
    lat                             (yc, xc) float32 dask.array<chunksize=(432, 432), meta=np.ndarray>
    lon                             (yc, xc) float32 dask.array<chunksize=(432, 432), meta=np.ndarray>
Dimensions without coordinates: nv
Data variables:
    Lambert_Azimuthal_Grid          (time) int32 -2147483647 ... -2147483647
    time_bnds                       (time, nv) datetime64[ns] dask.array<chunksize=(1, 2), meta=np.ndarray>
    ice_conc                        (time, yc, xc) float64 dask.array<chunksize=(1, 432, 432), meta=np.ndarray>
    raw_ice_conc_values             (time, yc, xc) float64 dask.array<chunksize=(1, 432, 432), meta=np.ndarray>
   

### Compute monthly average SIC

This requires re-combining the filtered SIC in 'ice_conc' with the unfiltered values in 'raw_ice_conc_values'. We use the 'status_flag' variable.

In [5]:
ice_conc = ds['ice_conc'].to_masked_array()
raw_ice_conc_values = ds['raw_ice_conc_values'].to_masked_array()
status_flag = ds['status_flag'].to_masked_array().astype('short')

# combine ice_conc with raw_ice_conc_values using the status_flag
ice_conc[ice_conc==100] = raw_ice_conc_values[ice_conc==100]
ice_conc[(status_flag & 4) == 4] = raw_ice_conc_values[(status_flag & 4) == 4]

# re-enter ice_conc into the xarray dataset
ds['ice_conc'][:] = ice_conc

Use xarray to compute the mean over time (now that we replaced the ice_conc)

In [6]:
ds_month = ds.mean(dim="time", keep_attrs=True)
ds_month

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 729.00 kiB 729.00 kiB Shape (432, 432) (432, 432) Count 150 Tasks 1 Chunks Type float32 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 729.00 kiB 729.00 kiB Shape (432, 432) (432, 432) Count 150 Tasks 1 Chunks Type float32 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,198 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.42 MiB 1.42 MiB Shape (432, 432) (432, 432) Count 198 Tasks 1 Chunks Type float64 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,198 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,135 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.42 MiB 1.42 MiB Shape (432, 432) (432, 432) Count 135 Tasks 1 Chunks Type float64 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,135 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,135 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.42 MiB 1.42 MiB Shape (432, 432) (432, 432) Count 135 Tasks 1 Chunks Type float64 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,135 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,135 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.42 MiB 1.42 MiB Shape (432, 432) (432, 432) Count 135 Tasks 1 Chunks Type float64 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,135 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,135 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.42 MiB 1.42 MiB Shape (432, 432) (432, 432) Count 135 Tasks 1 Chunks Type float64 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,135 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,135 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 729.00 kiB 729.00 kiB Shape (432, 432) (432, 432) Count 135 Tasks 1 Chunks Type float32 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,135 Tasks,1 Chunks
Type,float32,numpy.ndarray


Re-arrange SIC values into 'ice_conc' and 'raw_ice_conc_values'

In [7]:
ice_conc = ds_month['ice_conc'].to_masked_array()
raw_ice_conc_values = np.ma.array(np.zeros_like(ice_conc).astype(ice_conc.dtype),
                                  mask=np.ones_like(ice_conc).astype('bool'))
index = ice_conc>100
raw_ice_conc_values[index] = ice_conc[index]
ice_conc[index] = 100

index = ice_conc<10
raw_ice_conc_values[index] = ice_conc[index]
ice_conc[index] = 0

# store ice_conc and raw_ice_conc_values back in the xarray structure
ds_month['raw_ice_conc_values'][:] = raw_ice_conc_values
ds_month['ice_conc'][:] = ice_conc

Prepare a simplified 'status_flag' variable. We only keep '1' (land) and '128' (climatology). The rest we set to '0' (nominal). 

In [8]:
status_flag = ds_month['status_flag'].to_masked_array().astype('short')
index = (status_flag != 1)*(status_flag != 128)
status_flag[index] = 0
ds_month['status_flag'][:] = status_flag

### Clean the xarray dataset structure and write to file

In [9]:
ds_month = ds_month.drop_vars( ('total_standard_uncertainty', 'smearing_standard_uncertainty', 'algorithm_standard_uncertainty',) )

The filename should reflect what type of daily SIC files were used as input. If only 'cdr' files were used, the monthly file should have 'cdr'. If only 'icdr' files were used, the monthly files should have 'icdr'. If some fast-track ICDR files ('icdrft') were used, this should also be in the name of the monthly file.

In [10]:
# Amend some attributes
first_day, last_day = get_first_last_date(dt)
ds_month.attrs['time_coverage_start'] = datetime.combine(first_day, time()).isoformat()+'Z'
ds_month.attrs['time_coverage_end'] = (datetime.combine(last_day,time())+timedelta(days=1)).isoformat()+'Z'

ds_month.attrs['time_coverage_duration'] = 'P1M'
ds_month.attrs['time_coverage_resolution'] = 'P1M'

now = datetime.utcnow().replace(microsecond=0)
ds_month.attrs['history'] = now.isoformat()+'Z' + ' creation'
ds_month.attrs['date_created'] = now.date().isoformat()

ds_month.attrs['tracking_id'] = str(uuid.uuid4())

ds_month.attrs['title'] = 'Monthly ' + ds_month.attrs['title']

In [11]:
# Open one of the original files to extract information like dtype, _FillValue, time units, etc...
dso = xr.open_dataset(files[0],mask_and_scale=False,decode_times=False)
dso



Add 'time' dimension and coordinate variable (the 16th of the month).

In addition, add the 'time_bnds' variable, which requires the 'nv' dimension. time_bnds[0] gets the first day of the month (0 utc), time_bnds[1] gets the first day of the following month (0 utc).

This turned out to be a bit messy, suggestions for improvements are welcome.

In [12]:
# Prepare the time_bnds variable [start, end] and it as a data variable to the dataset.
time_bnds = [np.datetime64(ds_month.attrs['time_coverage_start'][:-1]),np.datetime64(ds_month.attrs['time_coverage_end'][:-1])]
time_bnds_da = xr.DataArray(time_bnds, [('nv', time_bnds,)])

# For some reasons, ds.assign brings a coordinate variable 'nv' that we must remove.
ds_month = ds_month.assign(time_bnds=time_bnds_da)
ds_month = ds_month.drop(labels='nv')
ds_month

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 729.00 kiB 729.00 kiB Shape (432, 432) (432, 432) Count 150 Tasks 1 Chunks Type float32 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 729.00 kiB 729.00 kiB Shape (432, 432) (432, 432) Count 150 Tasks 1 Chunks Type float32 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,200 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.42 MiB 1.42 MiB Shape (432, 432) (432, 432) Count 200 Tasks 1 Chunks Type float64 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,200 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,137 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.42 MiB 1.42 MiB Shape (432, 432) (432, 432) Count 137 Tasks 1 Chunks Type float64 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(432, 432)","(432, 432)"
Count,137 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,137 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 729.00 kiB 729.00 kiB Shape (432, 432) (432, 432) Count 137 Tasks 1 Chunks Type float32 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,137 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [13]:
# Prepare the time coordinate
mid_time = [np.datetime64('{:%Y-%m-16 12:00:00}'.format(dt)),]
mid_time_da = xr.DataArray(mid_time, [('time', mid_time,)])

mid_time_da

In [14]:
# add the 'time' coordinate variable (and dimension) to the dataset object
dst_month = ds_month.expand_dims(time=mid_time_da)

# drop time dimension for the CRS (added by expand_dims)
dst_month['Lambert_Azimuthal_Grid'] = dst_month['Lambert_Azimuthal_Grid'].squeeze(dim='time',drop=True)

# assign original attributes, but not 'units' nor 'calendar' as these
#   are re-introduced by xarray as encoding (not attributes) at time of
#   writing to netCDF
time_attrs = copy(dso.time.attrs)
time_attrs.pop('units',None)
time_attrs.pop('calendar',None)
for k in list(time_attrs.keys()):
    if k.startswith('_'):
        time_attrs.pop(k, None)
dst_month.time.attrs = time_attrs
dst_month

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 729.00 kiB 729.00 kiB Shape (432, 432) (432, 432) Count 150 Tasks 1 Chunks Type float32 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 729.00 kiB 729.00 kiB Shape (432, 432) (432, 432) Count 150 Tasks 1 Chunks Type float32 numpy.ndarray",432  432,

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(432, 432)","(432, 432)"
Count,150 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(1, 432, 432)","(1, 432, 432)"
Count,201 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.42 MiB 1.42 MiB Shape (1, 432, 432) (1, 432, 432) Count 201 Tasks 1 Chunks Type float64 numpy.ndarray",432  432  1,

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(1, 432, 432)","(1, 432, 432)"
Count,201 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(1, 432, 432)","(1, 432, 432)"
Count,138 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.42 MiB 1.42 MiB Shape (1, 432, 432) (1, 432, 432) Count 138 Tasks 1 Chunks Type float64 numpy.ndarray",432  432  1,

Unnamed: 0,Array,Chunk
Bytes,1.42 MiB,1.42 MiB
Shape,"(1, 432, 432)","(1, 432, 432)"
Count,138 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(1, 432, 432)","(1, 432, 432)"
Count,138 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 729.00 kiB 729.00 kiB Shape (1, 432, 432) (1, 432, 432) Count 138 Tasks 1 Chunks Type float32 numpy.ndarray",432  432  1,

Unnamed: 0,Array,Chunk
Bytes,729.00 kiB,729.00 kiB
Shape,"(1, 432, 432)","(1, 432, 432)"
Count,138 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [15]:
# uniqued list of daily CDR sources
srcs = sorted(list(set(srcs)))

# monthly file name (note the format of the datestring: YYYYMM.nc)
fn_patt = 'ice_conc_{a:}_ease2-250_{c:}-v3p0_{d:%Y%m}.nc'
fn = fn_patt.format(a=area, d=dt, c=srcs[-1])

# encoding
encoding = {'time': {'dtype':dso['time'].dtype, 'units':dso['time'].units, 'calendar':dso['time'].calendar,'_FillValue':None},
            'time_bnds': {'dtype':dso['time_bnds'].dtype,'units':dso['time_bnds'].units,'_FillValue':None},
            'ice_conc': {'dtype':dso['ice_conc'].dtype, 'scale_factor':dso['ice_conc'].scale_factor, '_FillValue':dso['ice_conc']._FillValue},
            'raw_ice_conc_values': {'dtype':dso['raw_ice_conc_values'].dtype, 'scale_factor':dso['raw_ice_conc_values'].scale_factor, '_FillValue':dso['raw_ice_conc_values']._FillValue},
            'status_flag': {'dtype':dso['status_flag'].dtype, '_FillValue':dso['status_flag']._FillValue},
            'lat': {'_FillValue':None}, 'lon': {'_FillValue':None},
            'xc': {'_FillValue':None}, 'yc': {'_FillValue':None},
            'Lambert_Azimuthal_Grid': {'_FillValue':None}
           }

# save the monthly xarray dataset to file
if os.path.exists(fn):
    os.remove(fn)
dst_month.to_netcdf(fn, encoding=encoding)

print(fn + ' ready')

ice_conc_sh_ease2-250_cdr-v3p0_200501.nc ready
