# Generate Biweekly averages of the `lead` dimension

---

Creates biweekly averages in the `lead` dimension of the raw, anomalies, or climatology files.

Also at the bottom of the notebook we compute biweekly averages for the verification data as well.

In [3]:
import cftime
import numpy as np
import xarray as xr
xr.set_options(keep_attrs=True)
import climpred
from tqdm import tqdm
import dask.array as da
import matplotlib.pyplot as plt
from matplotlib.ticker import FixedLocator
import xskillscore as xs
import warnings
warnings.filterwarnings("ignore")

from dask.distributed import Client
import dask.config
dask.config.set({"array.slicing.split_large_chunks": False})

<dask.config.set at 0x2ac4b9446910>

In [4]:
client = Client("tcp://10.12.206.54:34204")

Choose your model and data type

In [78]:
model = "NCEP" #ECMWF, ECCC, or NCEP
data = "climatology" #raw or anom or climatology

In [79]:
hinda = xr.open_zarr("/glade/campaign/mmm/c3we/jaye/S2S_zarr/"+model+"."+data+".daily.geospatial.zarr/", consolidated=True).astype('float32')

Because each model has a different `lead` dimension size I needed to hard code some things in.

In [80]:
if model=="ECMWF":
    hinda["lead"] = range(0,46) #for ECMWF
elif model=="NCEP":
    hinda["lead"] = range(0,43) #for NCEP
elif model=="ECCC":
    hinda["lead"] = range(0,32) #for ECCC

A function to calculate the biweekly averages.

In [81]:
def make_biweekly(hind):
    init_w12 = hind.sel(lead=range(1, 1 + 15)).mean(dim='lead', skipna=False)
    init_w34 = hind.sel(lead=range(16, 16 + 15)).mean(dim='lead', skipna=False)
    if model == "ECMWF":
        init_w56 = hind.sel(lead=range(31, 31 + 14)).mean(dim='lead', skipna=False) #for ECMWF
        hind_bw = xr.concat([init_w12, init_w34, init_w56], dim='lead')
        hind_bw['lead'] = [1, 15, 31] # lead represents first day of biweekly aggregate
    elif model == "NCEP":
        init_w56 = hind.sel(lead=range(31, 31 + 12)).mean(dim='lead', skipna=False) #for NCEP
        hind_bw = xr.concat([init_w12, init_w34, init_w56], dim='lead')
        hind_bw['lead'] = [1, 15, 31] # lead represents first day of biweekly aggregate
    elif model == "ECCC":
        hind_bw = xr.concat([init_w12, init_w34], dim='lead')
        hind_bw['lead'] = [1, 15] # lead represents first day of biweekly aggregate
    hind_bw['lead'].attrs['units'] = 'days'
    return hind_bw

## Apply the function to make biweekly averages

In [82]:
hind_bw = make_biweekly(hinda)

Rechunk and write the data to a new `zarr` file!

In [83]:
if data=="climatology":
    hind_bw = hind_bw.chunk({"dayofyear": 1, "lead": -1, "lat": 121, "lon": 240}).persist()
else:
    hind_bw = hind_bw.chunk({"member": -1, "init": 1, "lead": -1, "lat": 121, "lon": 240}).persist()

if data=="anom":
    hind_bw = hind_bw.drop("dayofyear")

In [85]:
%time hind_bw.to_zarr("/glade/campaign/mmm/c3we/jaye/S2S_zarr/"+model+"."+data+".biweekly.geospatial.zarr/",mode="w",consolidated=True)

CPU times: user 55.5 ms, sys: 8.14 ms, total: 63.7 ms
Wall time: 1.96 s


<xarray.backends.zarr.ZarrStore at 0x2b2c1f077f40>

# Make biweekly averages for verification data as well!

It's a different enough process that we want to do it separately.

In [5]:
verif = xr.open_zarr("/glade/campaign/mmm/c3we/jaye/S2S_zarr/OBS.anom.daily.geospatial.zarr/", consolidated=True).astype('float32')

In [6]:
verif

Unnamed: 0,Array,Chunk
Bytes,62.78 kiB,62.78 kiB
Shape,"(8036,)","(8036,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 62.78 kiB 62.78 kiB Shape (8036,) (8036,) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",8036  1,

Unnamed: 0,Array,Chunk
Bytes,62.78 kiB,62.78 kiB
Shape,"(8036,)","(8036,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,890.22 MiB,63.92 MiB
Shape,"(8036, 121, 240)","(577, 121, 240)"
Count,29 Tasks,14 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 890.22 MiB 63.92 MiB Shape (8036, 121, 240) (577, 121, 240) Count 29 Tasks 14 Chunks Type float32 numpy.ndarray",240  121  8036,

Unnamed: 0,Array,Chunk
Bytes,890.22 MiB,63.92 MiB
Shape,"(8036, 121, 240)","(577, 121, 240)"
Count,29 Tasks,14 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,890.22 MiB,63.59 MiB
Shape,"(8036, 121, 240)","(574, 121, 240)"
Count,29 Tasks,14 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 890.22 MiB 63.59 MiB Shape (8036, 121, 240) (574, 121, 240) Count 29 Tasks 14 Chunks Type float32 numpy.ndarray",240  121  8036,

Unnamed: 0,Array,Chunk
Bytes,890.22 MiB,63.59 MiB
Shape,"(8036, 121, 240)","(574, 121, 240)"
Count,29 Tasks,14 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,890.22 MiB,63.59 MiB
Shape,"(8036, 121, 240)","(574, 121, 240)"
Count,29 Tasks,14 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 890.22 MiB 63.59 MiB Shape (8036, 121, 240) (574, 121, 240) Count 29 Tasks 14 Chunks Type float32 numpy.ndarray",240  121  8036,

Unnamed: 0,Array,Chunk
Bytes,890.22 MiB,63.59 MiB
Shape,"(8036, 121, 240)","(574, 121, 240)"
Count,29 Tasks,14 Chunks
Type,float32,numpy.ndarray


In [7]:
verif_biweekly = verif.rolling(time=15, center=False).mean()
verif_biweekly = verif_biweekly.isel(time=slice(14, None)).assign_coords(time=verif.time.isel(time=slice(None, -14)))
%time verif_biweekly = verif_biweekly.chunk({'time':-1,'lon': 240, 'lat': 121}).persist()

CPU times: user 47.8 ms, sys: 1.05 ms, total: 48.9 ms
Wall time: 68 ms


In [9]:
verif_biweekly = verif_biweekly.drop("dayofyear")
verif_biweekly

Unnamed: 0,Array,Chunk
Bytes,1.74 GiB,1.74 GiB
Shape,"(8022, 121, 240)","(8022, 121, 240)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.74 GiB 1.74 GiB Shape (8022, 121, 240) (8022, 121, 240) Count 1 Tasks 1 Chunks Type float64 numpy.ndarray",240  121  8022,

Unnamed: 0,Array,Chunk
Bytes,1.74 GiB,1.74 GiB
Shape,"(8022, 121, 240)","(8022, 121, 240)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.74 GiB,1.74 GiB
Shape,"(8022, 121, 240)","(8022, 121, 240)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.74 GiB 1.74 GiB Shape (8022, 121, 240) (8022, 121, 240) Count 1 Tasks 1 Chunks Type float64 numpy.ndarray",240  121  8022,

Unnamed: 0,Array,Chunk
Bytes,1.74 GiB,1.74 GiB
Shape,"(8022, 121, 240)","(8022, 121, 240)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.74 GiB,1.74 GiB
Shape,"(8022, 121, 240)","(8022, 121, 240)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.74 GiB 1.74 GiB Shape (8022, 121, 240) (8022, 121, 240) Count 1 Tasks 1 Chunks Type float64 numpy.ndarray",240  121  8022,

Unnamed: 0,Array,Chunk
Bytes,1.74 GiB,1.74 GiB
Shape,"(8022, 121, 240)","(8022, 121, 240)"
Count,1 Tasks,1 Chunks
Type,float64,numpy.ndarray


In [22]:
%time verif_biweekly.to_zarr("/glade/campaign/mmm/c3we/jaye/S2S_zarr/OBS.anom.biweekly.geospatial.zarr/",mode="w",consolidated=True)

CPU times: user 11.3 ms, sys: 4.69 ms, total: 16 ms
Wall time: 229 ms


<xarray.backends.zarr.ZarrStore at 0x2b150811d760>