In [1]:
'''

This code is part of the SIPN2 project focused on improving sub-seasonal to seasonal predictions of Arctic Sea Ice. 
If you use this code for a publication or presentation, please cite the reference in the README.md on the
main page (https://github.com/NicWayand/ESIO). 

Questions or comments should be addressed to nicway@uw.edu

Copyright (c) 2018 Nic Wayand

GNU General Public License v3.0


'''

%matplotlib inline
%load_ext autoreload
%autoreload
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import numpy.ma as ma
import struct
import os
import xarray as xr
import glob
import datetime 
import cartopy.crs as ccrs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import seaborn as sns
import pandas as pd
import itertools
import calendar

# ESIO Imports

from esio import EsioData as ed
from esio import ice_plot
from esio import metrics

import dask
#dask.set_options(get=dask.threaded.get)
# from dask.distributed import Client, progress
# client = Client(processes=12)

# General plotting settings
sns.set_style('whitegrid')
sns.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 2.5})

plt.rcParams.update({'figure.max_open_warning': 0})

#############################################################
# Load in Data
#############################################################
E = ed.EsioData.load()
data_dir = E.data_dir
grid_dir = E.grid_dir

In [2]:
dask.config.set(scheduler='threads')  # overwrite default with threaded scheduler

<dask.config.set at 0x7f09ccd2a6a0>

In [3]:
runType = 'reforecast'


In [4]:
# Define models to plot
models_2_plot = list(E.model.keys())
models_2_plot = [x for x in models_2_plot if x!='piomas'] # remove some models
models_2_plot = [x for x in models_2_plot if E.icePredicted[x]] # Only predictive models
models_2_plot

['gfdlsipn',
 'yopp',
 'ukmetofficesipn',
 'ecmwfsipn',
 'ecmwf',
 'metreofr',
 'ukmo',
 'kma',
 'ncep',
 'usnavysipn',
 'usnavyncep',
 'rasmesrl',
 'noaasipn',
 'noaasipn_ext',
 'modcansipns_3',
 'modcansipns_4',
 'uclsipn',
 'szapirosipn',
 'MME']

In [5]:
models_2_plot = ['ncep']

In [6]:
# Convert netcdf file format into Contour Shifting format
# 
# years x months x lat x lon
#
# Where years is actualy different initilzation days
# and months is actually forecast lead time days

### WARNING! May need to change NSIDC data set depending on the model range used!

In [7]:
# Load in Observations
obs_f = '/home/disk/sipn/nicway/data/obs/NSIDC_0051/sipn_nc_yearly/*.nc'
ds_obs = xr.open_mfdataset(obs_f, concat_dim='time', autoclose=True)
# Format per Contour shifting (CS) req
# values betweeen 0 and 100 indicate the sea ice concentration percentage, 
# values of 110 indicate the grid box is within the satellite hole, 
# and values of 120 indicate the grid box is on land.
obs_CS = ds_obs.sic * 100 # fraction to percent
obs_CS = obs_CS.where(obs_CS<=100, other = 120) # Set hole and land to 120
obs_CS = obs_CS.where(ds_obs.hole_mask==0) # Set hole to Nan
obs_CS = obs_CS.where(obs_CS<=120, other = 110) # Set nan (hole) to 110
obs_CS = obs_CS.drop('hole_mask')
obs_CS.name = 'conc'
obs_CS

<xarray.DataArray 'conc' (time: 12321, y: 448, x: 304)>
dask.array<shape=(12321, 448, 304), dtype=float64, chunksize=(182, 448, 304)>
Coordinates:
  * x        (x) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ...
  * y        (y) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ...
    lat      (x, y) float64 31.1 31.25 31.4 31.55 31.69 31.84 31.99 32.13 ...
    lon      (x, y) float64 168.3 168.4 168.5 168.7 168.8 168.9 169.0 169.1 ...
    xm       (x) int64 -3850000 -3825000 -3800000 -3775000 -3750000 -3725000 ...
    ym       (y) int64 5850000 5825000 5800000 5775000 5750000 5725000 ...
  * time     (time) datetime64[ns] 1979-01-02 1979-01-04 1979-01-06 ...

In [8]:
# For each model
for (i, cmod) in enumerate(models_2_plot):
    print(cmod)
    
    # Load in Model
    all_files = os.path.join(E.model[cmod][runType]['sipn_nc'], '*.nc') 
    # Check we have files 
    files = glob.glob(all_files)
    if not files:
        #print("Skipping model", cmod, "no forecast files found.")
        continue # Skip this model
    ds_mod_all = xr.open_mfdataset(files, concat_dim='init_time', autoclose=True, parallel=True)
    
    # Loop over years
    for (cyear, ds_mod) in ds_mod_all.groupby('init_time.year'):
        print(cyear)

        # Adjust Model format
        # Want: (lat: 448, lon: 304, months: 12, years: 35)
        print("Taking mean of ensemble to correct...")
        mod_CS = ds_mod.mean(dim='ensemble').rename({'fore_time':'months','init_time':'years','lat':'latitude','lon':'longitude'}).sic
        mod_CS = mod_CS.rename({'nj':'lon','ni':'lat'})
        mod_CS.coords['lon'] = np.arange(1,mod_CS.lon.size+1,1)
        mod_CS.coords['lat'] = np.arange(1,mod_CS.lat.size+1,1)
        mod_CS.name = 'iceInd'
        mod_CS.coords['valid_time'] = mod_CS.years + mod_CS.months
        print(mod_CS.dims)

        # Loop through each model "init_time/years", find observed time for each valid time
        obs_CS_list = []
        for it in mod_CS.years: # For each init time
            temp_list = []
            for ft in mod_CS.months: # For each forecast period
                if (it+ft).values in obs_CS.time.values:
                    c_obs = obs_CS.sel(time=it+ft)
                    c_obs.coords['months'] = ft
                    temp_list.append(c_obs)
            if len(temp_list)>0: # If we found any obs for current forecast valid times
                da_temp = xr.concat(temp_list, dim='months')
                da_temp.coords['years'] = it    
                obs_CS_list.append(da_temp)
        obs_CS_new = xr.concat(obs_CS_list, dim='years')
        print(obs_CS_new.dims)

        # Discard those forecasts (init_times/years) with any missing observations
        # TODO: allow these later once Contour shift can handle them
        OK_years = obs_CS_new.notnull().sum(dim=['x','y','months'])
        OK_years = OK_years==OK_years.max().values
        obs_CS_new = obs_CS_new.where(OK_years, drop=True)
        mod_CS = mod_CS.where(OK_years, drop=True)

        # Add valid_time so we remember what the times mean!
        obs_CS_new.coords['valid_time'] = obs_CS_new.years + obs_CS_new.months
        obs_CS_new = obs_CS_new.rename({'lat':'latitude','lon':'longitude'})
        obs_CS_new = obs_CS_new.rename({'x':'lon','y':'lat'})
        obs_CS_new.coords['lon'] = np.arange(1,obs_CS_new.lon.size+1,1)
        obs_CS_new.coords['lat'] = np.arange(1,obs_CS_new.lat.size+1,1)

        # Set coords years and months to simple indexs for R code
        obs_CS_new.coords['years'] = np.arange(1,obs_CS_new.years.size+1,1)
        mod_CS.coords['years'] = np.arange(1,obs_CS_new.years.size+1,1)
        obs_CS_new.coords['months'] = np.arange(1,obs_CS_new.months.size+1,1)
        mod_CS.coords['months'] = np.arange(1,obs_CS_new.months.size+1,1)

        # drop extra vars
        obs_CS_new = obs_CS_new.drop(['xm','ym','time'])
        obs_CS_new = obs_CS_new.to_dataset()
        mod_CS.name = 'iceInd'
        mod_CS = mod_CS.to_dataset()

        # Reshape
        obs_CS_new = obs_CS_new.transpose('lat','lon','months','years')
        mod_CS = mod_CS.transpose('lat','lon','months','years')
        
        # Make smaller by downgrading actual type 
        obs_CS_new['conc'] = obs_CS_new.conc.astype('int16') # 0-120, so int16 is fine
        mod_CS['iceInd'] = mod_CS.iceInd.astype('float32') # 0-1 as a fraction, so float32 is fine

        # "flip" lat/y coord to match contour format
        print("Flipping...")
        obs_CS_new['conc'] = xr.DataArray(np.flip(obs_CS_new.conc.values, axis=0), dims = obs_CS_new.conc.dims, coords = obs_CS_new.conc.coords)
        mod_CS['iceInd'] = xr.DataArray(np.flip(mod_CS.iceInd.values, axis=0), dims = mod_CS.iceInd.dims, coords = mod_CS.iceInd.coords)

        # Write to netcdf
        out_dir = os.path.join(E.model_dir, cmod, runType, 'CS_daily')
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        print("Writing to disk...")
        obs_CS_new.to_netcdf(os.path.join(out_dir, str(cyear)+'_Obs_daily.nc'))
        mod_CS.to_netcdf(os.path.join(out_dir, str(cyear)+'_Mod_daily.nc'))

        print("Finished year", cyear)
    print("Finished model", cmod)


ncep
1999
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 1999
2000
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 2000
2001
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 2001
2002
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 2002
2003
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 2003
2004
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 2004
2005
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 2005
2006
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 2006
2007
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 2007
2008
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 2008
2009
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 2009
2010
Taking mean of ensemble to correct...
('years', 'months', 'lon', 'lat')
('years', 'months', 'y', 'x')


  return func(*args2)
  return func(*args2)


Flipping...


  x = np.divide(x1, x2, out)


Writing to disk...




Finished year 2010
Finished model ncep


### Daily

In [9]:
# Now Reformat into original Contour Shifting format (one file per each DOY initilization)
# For each model
for (i, cmod) in enumerate(models_2_plot):
    print(cmod)
    
    for prefix in ['Mod','Obs']:
        print(prefix)
        # Load in
        all_files = os.path.join(E.model_dir, cmod, runType, 'CS_daily', '*'+prefix+'*.nc') 
        # Check we have files 
        files = glob.glob(all_files)
        if not files:
            continue # Skip this model
        ds_mod_all = xr.open_mfdataset(sorted(files),  
                                       chunks={'lat':448, 'lon': 304, 'months': 43, 'years': 1},
                                       concat_dim='realYear', autoclose=True, parallel=True)
        if prefix=='Obs':
            ds_mod_all['conc'] = ds_mod_all.conc.astype('int16')
            
        # Create output dir
        out_dir = os.path.join(E.model_dir, cmod, runType, 'CS_yearly')
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        # Loop over each "year" (really the DOY for each year) and write to disk
        for cdoy in ds_mod_all.years.values:
            print(cdoy)
            ds_cdoy = ds_mod_all.sel(years=cdoy).drop('years')
            ds_cdoy = ds_cdoy.rename({'realYear':'years'})
            ds_cdoy.coords['years'] = np.arange(1,ds_cdoy.years.size+1)
            ds_cdoy = ds_cdoy.transpose('lat','lon','months','years')
            # TODO check for NaN?

            ds_cdoy.to_netcdf(os.path.join(out_dir, str(cdoy)+'_'+prefix+'_daily.nc'))
        ds_mod_all = None


ncep
Mod
1




2
3
4
5
6
7
8
9
10
11
12


KeyboardInterrupt: 

### Weekly

In [None]:
# Now Reformat into original Contour Shifting format (one file per each DOY initilization)
# For each model
for (i, cmod) in enumerate(models_2_plot):
    print(cmod)
    
    for prefix in ['Mod','Obs']:
        print(prefix)
        # Load in
        all_files = os.path.join(E.model_dir, cmod, runType, 'CS_daily', '*'+prefix+'*.nc') 
        # Check we have files 
        files = glob.glob(all_files)
        if not files:
            continue # Skip this model
        ds_mod_all = xr.open_mfdataset(sorted(files),  
                                       chunks={'lat':448, 'lon': 304, 'months': 43, 'years': 1},
                                       concat_dim='realYear', autoclose=True, parallel=True)
        if prefix=='Obs':
            ds_mod_all['conc'] = ds_mod_all.conc.astype('int16')
            
        # Create output dir
        out_dir = os.path.join(E.model_dir, cmod, runType, 'CS_yearly_weekly_mean')
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
            
        # Aggregate to weekly means
        y_bins = np.arange(0,ds_mod_all.years.size,7)
        y_bin_labels = y_bins[1:]
        ds_MON = ds_mod_all.groupby_bins('years', y_bins, labels=y_bin_labels).mean(dim='years')

        # Loop over each "year" (really the WOY (Week of year) for each year) and write to disk
        for cwoy in ds_MON.years_bins.values:
            print(cwoy)
            ds_cwoy = ds_MON.sel(years_bins=cwoy).drop('years_bins')
            ds_cwoy = ds_cwoy.rename({'realYear':'years'})
            ds_cwoy.coords['years'] = np.arange(1,ds_cwoy.years.size+1)
            ds_cwoy = ds_cwoy.transpose('lat','lon','months','years')
            # TODO check for NaN?

            ds_cwoy.to_netcdf(os.path.join(out_dir, str(cwoy)+'_'+prefix+'_weekly.nc'))
        ds_mod_all = None


### Monthly

In [14]:
# Now Reformat into original Contour Shifting format (one file per each DOY initilization)
# For each model
for (i, cmod) in enumerate(models_2_plot):
    print(cmod)
    
    for prefix in ['Mod','Obs']:
        print(prefix)
        # Load in
        all_files = os.path.join(E.model_dir, cmod, runType, 'CS_daily', '*'+prefix+'*.nc') 
        # Check we have files 
        files = glob.glob(all_files)
        if not files:
            continue # Skip this model
        ds_mod_all = xr.open_mfdataset(sorted(files),  
                                       chunks={'lat':448, 'lon': 304, 'months': 43, 'years': 1},
                                       concat_dim='realYear', autoclose=True, parallel=True)
        if prefix=='Obs':
            ds_mod_all['conc'] = ds_mod_all.conc.astype('int16')
            
        # Create output dir
        out_dir = os.path.join(E.model_dir, cmod, runType, 'CS_yearly_monthly_mean')
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
            
        # Aggregate to monthly means
        # First, years is just an int representing the day of the year
        # Convert to temp np.datetime64, to allow us to group it by months
        orig_years = ds_mod_all.years # Save old years coord
        # Add new datetime64 for a temp year
        ds_mod_all['years'] = pd.date_range(start='2000-01-01', periods=ds_mod_all.years.size, freq='D')
        # Group by month
        ds_MON = ds_mod_all.groupby('years.month').mean(dim='years')
        # years should now be the month of the year (int??)
        ds_MON = ds_MON.rename({'realYear':'years'})
        ds_MON.coords['years'] = np.arange(1,ds_MON.years.size+1)

        # Loop over each init month
        for cmon in ds_MON.month.values: # each init month
            print(cmon)
            ds_cwoy = ds_MON.sel(month=cmon)
            ds_cwoy = ds_cwoy.transpose('lat','lon','months','years')
            # TODO check for NaN?

            ds_cwoy.to_netcdf(os.path.join(out_dir, str(cmon)+'_'+prefix+'_monthly.nc'))
        ds_mod_all = None


ncep
Mod
1


  x = np.divide(x1, x2, out)


2
3
4
5
6
7
8
9
10
11
12
Obs
1




2
3
4
5
6
7
8
9
10
11
12
