## Compute ivtx and ivty netcdf for AR detection algorithm
**Author: Deanna Nash**

This notebook computes ivtx and ivty using the ECMWF QUV.grb files and writes to a netCDF that is compliant for use with the Guan and Waliser AR detection algorithm. 

In [1]:
%matplotlib inline

import sys
from netCDF4 import Dataset
import netCDF4 as nc
import cftime
from datetime import datetime, timedelta
from netCDF4 import num2date, date2num
import time as time2
import numpy as np
import pandas as pd
import xarray as xr
import eofs
from eofs.standard import Eof
import glob

# you need intake-esm V 2020.11.4 and intake V 0.6.0

# import tensorflow as tf

import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
from shapely.geometry.polygon import LinearRing


import matplotlib as mpl
import matplotlib.pyplot as plt
from netCDF4 import Dataset
from matplotlib import cm

import copy
import fsspec
import intake



In [2]:
path_to_data = '/glade/scratch/dlnash/data/ECMWF/'
aneesh_data = '/glade/scratch/acsubram/S2S_Database/'

## Compute ivtx and ivty for selected initialization date

In [7]:
import re


# for file in glob.glob(aneesh_data + "QUV_20170119.grb"):
# Read Control Run
ds = aneesh_data + "QUV_20170119.grb"
dsopen = xr.open_dataset(ds, engine='cfgrib')
u = dsopen['u']
v = dsopen['v']
q = dsopen['q']
time_ctl = dsopen['time']
lon_ctl = dsopen['longitude']
lat_ctl = dsopen['latitude']
ens_ctl = dsopen['number']
step_ctl = dsopen['step']
prs_ctl = dsopen['isobaricInhPa']
# calculate the zonal and meridional horizontal vapour transport
# Pressure levels: 1000 925 850 700 500 300 200


#200 - 300mb
qu0 = np.mean(q.sel(isobaricInhPa=[200,300]),axis=2) * np.mean(u.sel(isobaricInhPa=[200,300]),axis=2)*10000/9.8
#300 - 500mb
qu1 = np.mean(q.sel(isobaricInhPa=[300,500]),axis=2) * np.mean(u.sel(isobaricInhPa=[300,500]),axis=2)*20000/9.8
#500 - 700 mb
qu2 = np.mean(q.sel(isobaricInhPa=[500,700]),axis=2) * np.mean(u.sel(isobaricInhPa=[500,700]),axis=2)*20000/9.8
#700 - 850 mb
qu3 = np.mean(q.sel(isobaricInhPa=[700,850]),axis=2) * np.mean(u.sel(isobaricInhPa=[700,850]),axis=2)*15000/9.8
#850 - 925 mb
qu4 = np.mean(q.sel(isobaricInhPa=[850,925]),axis=2) * np.mean(u.sel(isobaricInhPa=[850,925]),axis=2)*7500/9.8
#925 - 1000 mb
qu5 = np.mean(q.sel(isobaricInhPa=[925,1000]),axis=2) * np.mean(u.sel(isobaricInhPa=[925,1000]),axis=2)*7500/9.8

qu = qu0+qu1+qu2+qu3+qu4+qu5


#200 - 300mb
qv0 = np.mean(q.sel(isobaricInhPa=[200,300]),axis=2) * np.mean(v.sel(isobaricInhPa=[200,300]),axis=2)*10000/9.8
#300 - 500mb
qv1 = np.mean(q.sel(isobaricInhPa=[300,500]),axis=2) * np.mean(v.sel(isobaricInhPa=[300,500]),axis=2)*20000/9.8
#500 - 700 mb
qv2 = np.mean(q.sel(isobaricInhPa=[500,700]),axis=2) * np.mean(v.sel(isobaricInhPa=[500,700]),axis=2)*20000/9.8
#700 - 850 mb
qv3 = np.mean(q.sel(isobaricInhPa=[700,850]),axis=2) * np.mean(v.sel(isobaricInhPa=[700,850]),axis=2)*15000/9.8
#850 - 925 mb
qv4 = np.mean(q.sel(isobaricInhPa=[850,925]),axis=2) * np.mean(v.sel(isobaricInhPa=[850,925]),axis=2)*7500/9.8
#925 - 1000 mb
qv5 = np.mean(q.sel(isobaricInhPa=[925,1000]),axis=2) * np.mean(v.sel(isobaricInhPa=[925,1000]),axis=2)*7500/9.8


qv = qv0+qv1+qv2+qv3+qv4+qv5

tmp = xr.Dataset({'ivtx': qu,
                  'ivty': qv})
tmp['ivtx'] = qu
tmp['ivty'] = qv


#     tmp['lev'] = ('lev', [1])
#     tmp = tmp.set_coords('lev')



In [9]:
## hack for times so they aren't in a gregorian calendar which the algorithm did not like
# initialization date
t1 = pd.date_range(start='2017-01-19', end='2017-01-19', freq='1D')
# valid times
times_lst = pd.date_range(start='2017-01-19', end='2017-03-06', freq='1D')
times_lst

dates=times_lst.tolist()
units="days since 1900-01-01 00"
cal = 'standard'
timez = nc.date2num(dates, units, calendar=cal)
time1 = nc.date2num(t1.tolist(), units, calendar=cal)
# cftime.date2index(times_lst, nctime, calendar=None, select='exact', has_year_zero=None)

# cftime.date2num(dates, units, calendar=None)

In [10]:
# dns = xr.Dataset(
# {
#     "ivtx": (['ens', 'time', 'lat', 'lon'], tmp.ivtx.values),
#     "ivty": (['ens', 'time', 'lat', 'lon'], tmp.ivty.values)
# },
# coords={
#     "ens":  tmp.number.values, # ensemble number
#     "time":  timez, # days since initialization date
#     "lat":tmp.latitude.values,
#     "lon":tmp.longitude.values,
# },)

# # dimensions:
# #         lon = 240 ;
# #         lat = 121 ;
# #         lev = 47 ; # change to step
# #         time = UNLIMITED ; // (1 currently)
# #         ens = 11 ; # number
    
    
# dns = dns.expand_dims(dim={"lev":1}) # initialization date
# dns['lev'] = ('lev', [1])
# dns = dns.set_coords('lev')
# dns

In [11]:
dns = xr.Dataset(
{
    "ivtx": (['ens', 'lev', 'lat', 'lon'], tmp.ivtx.values),
    "ivty": (['ens', 'lev', 'lat', 'lon'], tmp.ivty.values)
},
coords={
    "ens":  tmp.number.values, # ensemble number
    "lev":  np.arange(len(tmp.valid_time.values)), # days since initialization date
    "lat":tmp.latitude.values,
    "lon":tmp.longitude.values,
},)

# dimensions:
#         lon = 240 ;
#         lat = 121 ;
#         lev = 47 ; # change to step
#         time = UNLIMITED ; // (1 currently)
#         ens = 11 ; # number
    
# add time dimension    
dns = dns.expand_dims(dim={"time":1}) # initialization date
# dns['time'] = ('time', [time1])

dns = dns.assign(time=lambda dns: time1)
dns = dns.set_coords('time')


  # update time attributes
dns.time.attrs = dict(
        units="days since 1900-01-01 00",
        calendar='standard'
    )

print(dns.time.attrs)

# reorder dimensions
dns = dns.transpose('ens', 'time', 'lev', 'lat', 'lon')
dns  

<xarray.Dataset>
Dimensions:  (ens: 50, lat: 121, lev: 47, lon: 240, time: 1)
Coordinates:
  * ens      (ens) int64 1 2 3 4 5 6 7 8 9 10 ... 41 42 43 44 45 46 47 48 49 50
  * lev      (lev) int64 0 1 2 3 4 5 6 7 8 9 ... 37 38 39 40 41 42 43 44 45 46
  * lat      (lat) float64 90.0 88.5 87.0 85.5 84.0 ... -85.5 -87.0 -88.5 -90.0
  * lon      (lon) float64 0.0 1.5 3.0 4.5 6.0 ... 352.5 354.0 355.5 357.0 358.5
  * time     (time) float64 4.275e+04
Data variables:
    ivtx     (time, ens, lev, lat, lon) float32 -26.485043 ... 7.4784155
    ivty     (time, ens, lev, lat, lon) float32 -2.9125767 ... 0.4839363

In [15]:
# write to ds with time as unlimited dimension
dns.to_netcdf(path_to_data+"IVT_20170119.nc", unlimited_dims=['time'])