In [1]:
'''

This code is part of the SIPN2 project focused on improving sub-seasonal to seasonal predictions of Arctic Sea Ice. 
If you use this code for a publication or presentation, please cite the reference in the README.md on the
main page (https://github.com/NicWayand/ESIO). 

Questions or comments should be addressed to nicway@uw.edu

Copyright (c) 2018 Nic Wayand

GNU General Public License v3.0


'''

%matplotlib inline
%load_ext autoreload
%autoreload
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import numpy.ma as ma
import struct
import os
import xarray as xr
import glob
import datetime 
import cartopy.crs as ccrs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import seaborn as sns
import pandas as pd
import itertools
import calendar

# ESIO Imports

from esio import EsioData as ed
from esio import ice_plot
from esio import metrics

import dask
#dask.set_options(get=dask.threaded.get)
# from dask.distributed import Client, progress
# client = Client(processes=12)

# General plotting settings
sns.set_style('whitegrid')
sns.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 2.5})

plt.rcParams.update({'figure.max_open_warning': 0})

#############################################################
# Load in Data
#############################################################
E = ed.EsioData.load()
data_dir = E.data_dir
grid_dir = E.grid_dir

In [3]:
dask.config.set(scheduler='threads')  # overwrite default with threaded scheduler

<dask.config.set at 0x7f9f14ac21d0>

## Model

In [None]:
# Temp target file
mod_f = '/home/disk/sipn/nicway/data/model/ncep/forecast/sipn_nc/ncep_2017*.nc'
ds_mod = xr.open_mfdataset(mod_f, concat_dim='init_time')
ds_mod 

In [None]:
# Want: (lat: 448, lon: 304, months: 12, years: 35)
ce = 0
mod_CS = ds_mod.isel(ensemble=ce).rename({'fore_time':'months','init_time':'years','lat':'latitude','lon':'longitude'}).sic
mod_CS = mod_CS.rename({'nj':'lon','ni':'lat'})
mod_CS.coords['lon'] = np.arange(1,mod_CS.lon.size+1,1)
mod_CS.coords['lat'] = np.arange(1,mod_CS.lat.size+1,1)
mod_CS.name = 'iceInd'
mod_CS

## Obs

In [None]:
# Temp target file
obs_f = '/home/disk/sipn/nicway/data/obs/NSIDC_0081/sipn_nc_yearly/*.nc'
ds_obs = xr.open_mfdataset(obs_f, concat_dim='time', autoclose=True)
ds_obs

In [None]:
# Format per Contour shifting (CS) req
# values betweeen 0 and 100 indicate the sea ice concentration percentage, 
# values of 110 indicate the grid box is within the satellite hole, 
# and values of 120 indicate the grid box is on land.
obs_CS = ds_obs.sic * 100 # fraction to percent
obs_CS = obs_CS.where(obs_CS<=100, other = 120) # Set hole and land to 120
obs_CS = obs_CS.where(ds_obs.hole_mask==0) # Set hole to Nan
obs_CS = obs_CS.where(obs_CS<=120, other = 110) # Set nan (hole) to 110
obs_CS = obs_CS.drop('hole_mask')
obs_CS.isel(time=0).plot(vmin=100, vmax=120)
obs_CS.name = 'conc'
obs_CS

In [None]:
# Format time to match model 

In [None]:
mod_CS.coords['valid_time'] = mod_CS.years + mod_CS.months
mod_CS

In [None]:
mod_CS.valid_time[0,0].values==obs_CS.time[2].values

In [None]:
# Loop through each model "init_time/years", find observed time for each valid time
obs_CS_list = []
for it in mod_CS.years: # For each init time
    temp_list = []
    for ft in mod_CS.months: # For each forecast period
        if (it+ft).values in obs_CS.time.values:
            c_obs = obs_CS.sel(time=it+ft)
            c_obs.coords['months'] = ft
            temp_list.append(c_obs)
    if len(temp_list)>0: # If we found any obs for current forecast valid times
        da_temp = xr.concat(temp_list, dim='months')
        da_temp.coords['years'] = it    
        obs_CS_list.append(da_temp)
obs_CS_new = xr.concat(obs_CS_list, dim='years')    

In [None]:
obs_CS_new

In [None]:
# Discard those forecasts (init_times/years) with any missing observations
# TODO: allow these later once Contour shift can handle them
OK_years = obs_CS_new.notnull().sum(dim=['x','y','months'])
OK_years = OK_years==OK_years.max().values
obs_CS_new = obs_CS_new.where(OK_years, drop=True)
mod_CS = mod_CS.where(OK_years, drop=True)

In [None]:
# Add valid_time so we remember what the times mean!
obs_CS_new.coords['valid_time'] = obs_CS_new.years + obs_CS_new.months

In [None]:
obs_CS_new = obs_CS_new.rename({'lat':'latitude','lon':'longitude'})
obs_CS_new = obs_CS_new.rename({'x':'lon','y':'lat'})
obs_CS_new.coords['lon'] = np.arange(1,obs_CS_new.lon.size+1,1)
obs_CS_new.coords['lat'] = np.arange(1,obs_CS_new.lat.size+1,1)
obs_CS_new

In [None]:
# Set coords years and months to simple indexs for R code
obs_CS_new.coords['years'] = np.arange(1,obs_CS_new.years.size+1,1)
mod_CS.coords['years'] = np.arange(1,obs_CS_new.years.size+1,1)
obs_CS_new.coords['months'] = np.arange(1,obs_CS_new.months.size+1,1)
mod_CS.coords['months'] = np.arange(1,obs_CS_new.months.size+1,1)

In [None]:
mod_CS

In [None]:
# drop extra vars
# obs_CS_new = obs_CS_new.drop(['latitude','longitude','xm','ym','valid_time','time'])
obs_CS_new = obs_CS_new.drop(['xm','ym','time'])


In [None]:
# mod_CS = mod_CS.drop(['latitude','longitude','valid_time'])

In [None]:
obs_CS_new

In [None]:
# Order
obs_CS_new = obs_CS_new.to_dataset()


In [None]:
mod_CS.name = 'iceInd'
mod_CS = mod_CS.to_dataset()

In [None]:
obs_CS_new = obs_CS_new.transpose('lat','lon','months','years')
mod_CS = mod_CS.transpose('lat','lon','months','years')

In [None]:
# "flip" lat/y coordiant
obs_CS_new['conc'] = xr.DataArray(np.flip(obs_CS_new.conc.values, axis=0), dims = obs_CS_new.conc.dims, coords = obs_CS_new.conc.coords)
mod_CS['iceInd'] = xr.DataArray(np.flip(mod_CS.iceInd.values, axis=0), dims = mod_CS.iceInd.dims, coords = mod_CS.iceInd.coords)

In [None]:
# "flip" lat/y coordiant
# obs_CS_new.coords['lat'] = np.flip(obs_CS_new.lat.values, axis=0)
# mod_CS.coords['lat'] = np.flip(mod_CS.lat.values, axis=0)

In [None]:
# Make smaller by downgrading actual type 
obs_CS_new['conc'] = obs_CS_new.conc.astype('int16') # 0-120, so int16 is fine
mod_CS['iceInd'] = mod_CS.iceInd.astype('float32') # 0-1 as a fraction, so float32 is fine

In [None]:
# Make it smaller on disk (15GB to 5 GB!)
# obs_CS_new.conc.encoding = {'dtype': 'int16', 'scale_factor': 0.1, '_FillValue': -9999}
# mod_CS.iceInd.encoding = {'dtype': 'int16', 'scale_factor': 0.1, '_FillValue': -9999}

In [None]:
# Write out test files
out_dir = '/home/disk/sipn/nicway/data/Director_data/RPackage/SIPN'
obs_CS_new.to_netcdf(os.path.join(out_dir, 'Obs_test.nc'))
mod_CS.to_netcdf(os.path.join(out_dir, 'Mod_test.nc'))

In [None]:
# os.chdir('/home/disk/sipn/nicway/test')

# # Create test dataset
# ds = xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])})
# print(ds.dims)
# ds.to_netcdf('orig.nc')
# print(xr.open_dataset('orig.nc').dims)

# ds.transpose('y', 'z', 'x').to_netcdf('new.nc')
# print(xr.open_dataset('new.nc').dims)





# # Test DataArray
# da = xr.DataArray( [[[42]]], dims=('x', 'y', 'z'), coords={'x':[1], 'y':[1], 'z':[1]}, name='test')
# print(da)

# da.to_netcdf('da_orig.nc')
# print(xr.open_dataset('da_orig.nc'))

# da_t = da.transpose('y', 'z', 'x')
# print(da_t)
# da_t.to_netcdf('da_new.nc')
# print(xr.open_dataset('da_new.nc'))

# da.coords