# BRAN2020 daily ARD

Date: 24 April, 2024

Author = {"name": "Thomas Moore", "affiliation": "CSIRO", "email": "thomas.moore@csiro.au", "orcid": "0000-0003-3930-1946"}

### BRAN2020 is order 50TB of `float32` data over nearly 9000 `netcdf` file assests in total.

#### required packages

In [1]:
import intake
import xarray as xr
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

from dask.distributed import Client, LocalCluster
import dask
import datetime

#### start a local Dask client

In [13]:
cluster=LocalCluster(n_workers=7,processes=True,threads_per_worker=1)
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 7
Total threads: 7,Total memory: 251.18 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:35777,Workers: 7
Dashboard: /proxy/8787/status,Total threads: 7
Started: Just now,Total memory: 251.18 GiB

0,1
Comm: tcp://127.0.0.1:46823,Total threads: 1
Dashboard: /proxy/38385/status,Memory: 35.88 GiB
Nanny: tcp://127.0.0.1:32909,
Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-2jk3cuun,Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-2jk3cuun

0,1
Comm: tcp://127.0.0.1:35247,Total threads: 1
Dashboard: /proxy/35649/status,Memory: 35.88 GiB
Nanny: tcp://127.0.0.1:42707,
Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-tuntmpmx,Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-tuntmpmx

0,1
Comm: tcp://127.0.0.1:42457,Total threads: 1
Dashboard: /proxy/38165/status,Memory: 35.88 GiB
Nanny: tcp://127.0.0.1:35389,
Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-kbnxbd4e,Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-kbnxbd4e

0,1
Comm: tcp://127.0.0.1:37895,Total threads: 1
Dashboard: /proxy/40341/status,Memory: 35.88 GiB
Nanny: tcp://127.0.0.1:43653,
Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-ay_djvip,Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-ay_djvip

0,1
Comm: tcp://127.0.0.1:38159,Total threads: 1
Dashboard: /proxy/34031/status,Memory: 35.88 GiB
Nanny: tcp://127.0.0.1:44289,
Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-utcc1bu9,Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-utcc1bu9

0,1
Comm: tcp://127.0.0.1:34449,Total threads: 1
Dashboard: /proxy/42301/status,Memory: 35.88 GiB
Nanny: tcp://127.0.0.1:36163,
Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-zxlbnkms,Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-zxlbnkms

0,1
Comm: tcp://127.0.0.1:34233,Total threads: 1
Dashboard: /proxy/43871/status,Memory: 35.88 GiB
Nanny: tcp://127.0.0.1:42521,
Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-2wjjqfqx,Local directory: /jobfs/114745531.gadi-pbs/dask-scratch-space/worker-2wjjqfqx


# ARD workflow part 1

### read paths from config file

#### [ you will need to specifiy your correct path for the `data-catalogue/config.ini` file ]

In [2]:
import configparser

# Create a ConfigParser object
config = configparser.ConfigParser()

# Read the config file
#########
#### you will need to specifiy your correct path the the `data-catalogue/config.ini` file 
#########
config.read('./code/BRAN2020-intake-catalog/config.ini')

# Get the value of a variable
catalog_path = config.get('paths', 'catalog_path')

In [3]:
catalog_path

'/g/data/v14/tm4888/code/BRAN2020-intake-catalog/catalogs/'

In [4]:
BRAN2020_catalog = intake.open_esm_datastore(catalog_path+'BRAN2020.json',columns_with_iterables=['variable'])

In [5]:
BRAN2020_catalog

Unnamed: 0,unique
source,1
domain,4
time_period,4
variable,142
path,8949
derived_variable,0


In [6]:
BRAN2020_catalog.unique()['source']

['BRAN2020']

In [7]:
BRAN2020_catalog.unique()['domain']

['atm', 'ice', 'ocean', 'grid']

In [8]:
BRAN2020_catalog.unique()['time_period']

['annual', 'daily', 'month', 'static']

In [9]:
var_list = BRAN2020_catalog.unique()['variable']
var_list.sort()
df = pd.DataFrame(var_list, columns=['BRAN2020 Variables'])
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df)

        BRAN2020 Variables
0                  angle_C
1                  angle_E
2                  angle_N
3                  angle_T
4                   area_C
5                   area_E
6                   area_N
7                   area_T
8                    bmf_u
9                    bmf_v
10                 depth_t
11              ds_00_01_C
12              ds_00_01_E
13              ds_00_01_N
14              ds_00_01_T
15              ds_00_02_C
16              ds_00_02_E
17              ds_00_02_N
18              ds_00_02_T
19              ds_00_10_C
20              ds_00_10_E
21              ds_00_10_N
22              ds_00_10_T
23              ds_00_20_C
24              ds_00_20_E
25              ds_00_20_N
26              ds_00_20_T
27              ds_01_02_C
28              ds_01_02_E
29              ds_01_02_N
30              ds_01_02_T
31              ds_01_11_C
32              ds_01_11_E
33              ds_01_11_N
34              ds_01_11_T
35              ds_01_21_C
3

## search for all daily `temp` data

In [10]:
search = BRAN2020_catalog.search(variable=['temp'],time_period=['daily'])

In [11]:
search.df['path'][360]

'/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_2023_01.nc'

(base) tm4888@gadi-login-03 ~ du -hs /g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_2023_01.nc
5.0G	/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_2023_01.nc

short temp(Time, st_ocean, yt_ocean, xt_ocean) ;
		temp:long_name = "Potential temperature" ;
		temp:units = "degrees C" ;
		temp:valid_range = -32767s, 32767s ;
		temp:missing_value = -32768s ;
		temp:_FillValue = -32768s ;
		temp:packing = 4 ;
		temp:scale_factor = 0.00778222f ;
		temp:add_offset = 245.f ;
		temp:cell_methods = "time: mean Time: mean" ;
		temp:time_avg_info = "average_T1,average_T2,average_DT" ;
		temp:coordinates = "geolon_t geolat_t" ;
		temp:standard_name = "sea_water_potential_temperature" ;
		
		temp:_Storage = "chunked" ;
		temp:_ChunkSizes = 1, 1, 300, 300 ;

In [12]:
search.df

Unnamed: 0,source,domain,time_period,variable,path
0,BRAN2020,ocean,daily,[temp],/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_199...
1,BRAN2020,ocean,daily,[temp],/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_199...
2,BRAN2020,ocean,daily,[temp],/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_199...
3,BRAN2020,ocean,daily,[temp],/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_199...
4,BRAN2020,ocean,daily,[temp],/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_199...
...,...,...,...,...,...
361,BRAN2020,ocean,daily,[temp],/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_202...
362,BRAN2020,ocean,daily,[temp],/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_202...
363,BRAN2020,ocean,daily,[temp],/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_202...
364,BRAN2020,ocean,daily,[temp],/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_202...


### Out of some 9000 files we now have narrowed down the just the 366 that have the information we need

### We can now load all that data into a single 11TB "lazy" `xarray` object for further reduction and analysis

In [21]:
%%time
xarray_open_kwargs = {"chunks": {"Time": 1, "st_ocean": 1, "xt_ocean": 300, "yt_ocean": 300}}
#xarray_open_kwargs = {"chunks": {"Time": 27, "xt_ocean": 3600, "yt_ocean": 1500}}
DS=search.to_dask(xarray_open_kwargs=xarray_open_kwargs)

CPU times: user 44.8 s, sys: 4.4 s, total: 49.2 s
Wall time: 49.2 s


# ARD - write zarr & chunk & write zarr

In [22]:
BRAN2020_ard_path = '/scratch/es60/ard/reanalysis/BRAN2020/ARD/'
ard_file_ID = 'BRAN2020-daily-temp-chunk27-10-1500-3600-v25042024.zarr'

In [23]:
DS['temp']=DS['temp'].astype('float32')
DS

Unnamed: 0,Array,Chunk
Bytes,11.16 TiB,351.56 kiB
Shape,"(11138, 51, 1500, 3600)","(1, 1, 300, 300)"
Dask graph,34082280 chunks in 733 graph layers,34082280 chunks in 733 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 11.16 TiB 351.56 kiB Shape (11138, 51, 1500, 3600) (1, 1, 300, 300) Dask graph 34082280 chunks in 733 graph layers Data type float32 numpy.ndarray",11138  1  3600  1500  51,

Unnamed: 0,Array,Chunk
Bytes,11.16 TiB,351.56 kiB
Shape,"(11138, 51, 1500, 3600)","(1, 1, 300, 300)"
Dask graph,34082280 chunks in 733 graph layers,34082280 chunks in 733 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [29]:
single_ds = xr.open_mfdataset('/g/data/gb6/BRAN/BRAN2020/daily/ocean_temp_2023_01.nc',parallel=True)
single_ds

Unnamed: 0,Array,Chunk
Bytes,248 B,8 B
Shape,"(31,)","(1,)"
Dask graph,31 chunks in 2 graph layers,31 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 248 B 8 B Shape (31,) (1,) Dask graph 31 chunks in 2 graph layers Data type datetime64[ns] numpy.ndarray",31  1,

Unnamed: 0,Array,Chunk
Bytes,248 B,8 B
Shape,"(31,)","(1,)"
Dask graph,31 chunks in 2 graph layers,31 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,248 B,8 B
Shape,"(31,)","(1,)"
Dask graph,31 chunks in 2 graph layers,31 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 248 B 8 B Shape (31,) (1,) Dask graph 31 chunks in 2 graph layers Data type datetime64[ns] numpy.ndarray",31  1,

Unnamed: 0,Array,Chunk
Bytes,248 B,8 B
Shape,"(31,)","(1,)"
Dask graph,31 chunks in 2 graph layers,31 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,248 B,8 B
Shape,"(31,)","(1,)"
Dask graph,31 chunks in 2 graph layers,31 chunks in 2 graph layers
Data type,timedelta64[ns] numpy.ndarray,timedelta64[ns] numpy.ndarray
"Array Chunk Bytes 248 B 8 B Shape (31,) (1,) Dask graph 31 chunks in 2 graph layers Data type timedelta64[ns] numpy.ndarray",31  1,

Unnamed: 0,Array,Chunk
Bytes,248 B,8 B
Shape,"(31,)","(1,)"
Dask graph,31 chunks in 2 graph layers,31 chunks in 2 graph layers
Data type,timedelta64[ns] numpy.ndarray,timedelta64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,496 B,16 B
Shape,"(31, 2)","(1, 2)"
Dask graph,31 chunks in 2 graph layers,31 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 496 B 16 B Shape (31, 2) (1, 2) Dask graph 31 chunks in 2 graph layers Data type float64 numpy.ndarray",2  31,

Unnamed: 0,Array,Chunk
Bytes,496 B,16 B
Shape,"(31, 2)","(1, 2)"
Dask graph,31 chunks in 2 graph layers,31 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,31.80 GiB,351.56 kiB
Shape,"(31, 51, 1500, 3600)","(1, 1, 300, 300)"
Dask graph,94860 chunks in 2 graph layers,94860 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 31.80 GiB 351.56 kiB Shape (31, 51, 1500, 3600) (1, 1, 300, 300) Dask graph 94860 chunks in 2 graph layers Data type float32 numpy.ndarray",31  1  3600  1500  51,

Unnamed: 0,Array,Chunk
Bytes,31.80 GiB,351.56 kiB
Shape,"(31, 51, 1500, 3600)","(1, 1, 300, 300)"
Dask graph,94860 chunks in 2 graph layers,94860 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [None]:
%%time
DS.to_zarr(BRAN2020_ard_path+ard_file_ID,consolidated=True)
# ----- Client(threads_per_worker=1) -------
# CPU times: user 2min 26s, sys: 29.9 s, total: 2min 56s
# Wall time: 13min 1s
# ----- Client() -------
# CPU times: user 1min 53s, sys: 18.1 s, total: 2min 11s
# Wall time: 14min 3s

In [None]:
!touch /scratch/es60/ard/reanalysis/BRAN2020/finished_BRAN2020-daily-temp-chunk27-10-1500-3600-v25042024.log

## RELOAD in ARD collection

In [4]:
BRAN2020_daily = xr.open_zarr(BRAN2020_ard_path+ard_file_ID,consolidated=True)

In [5]:
BRAN2020_daily

Unnamed: 0,Array,Chunk
Bytes,11.16 TiB,205.99 MiB
Shape,"(11138, 51, 1500, 3600)","(1, 10, 1500, 3600)"
Dask graph,66828 chunks in 2 graph layers,66828 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 11.16 TiB 205.99 MiB Shape (11138, 51, 1500, 3600) (1, 10, 1500, 3600) Dask graph 66828 chunks in 2 graph layers Data type float32 numpy.ndarray",11138  1  3600  1500  51,

Unnamed: 0,Array,Chunk
Bytes,11.16 TiB,205.99 MiB
Shape,"(11138, 51, 1500, 3600)","(1, 10, 1500, 3600)"
Dask graph,66828 chunks in 2 graph layers,66828 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


# rechunker

In [33]:
test_ds = BRAN2020_daily.isel({'Time':slice(0,366)})
test_ds

Unnamed: 0,Array,Chunk
Bytes,375.50 GiB,205.99 MiB
Shape,"(366, 51, 1500, 3600)","(1, 10, 1500, 3600)"
Dask graph,2196 chunks in 3 graph layers,2196 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 375.50 GiB 205.99 MiB Shape (366, 51, 1500, 3600) (1, 10, 1500, 3600) Dask graph 2196 chunks in 3 graph layers Data type float32 numpy.ndarray",366  1  3600  1500  51,

Unnamed: 0,Array,Chunk
Bytes,375.50 GiB,205.99 MiB
Shape,"(366, 51, 1500, 3600)","(1, 10, 1500, 3600)"
Dask graph,2196 chunks in 3 graph layers,2196 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


##### "Consider either rechunking using `chunk()`, deleting or modifying `encoding['chunks']`"

In [15]:
def remove_zarr_encoding(DS):
    for var in DS:
        DS[var].encoding = {}

    for coord in DS.coords:
        DS[coord].encoding = {}
    return DS

In [39]:
ds = remove_zarr_encoding(BRAN2020_daily)

In [40]:
chunking_dict={'Time':-1,'st_ocean':1,'xt_ocean':360,'yt_ocean':150}
#chunking_dict={'Time':-1,'st_ocean':1,'xt_ocean':10,'yt_ocean':10}

In [41]:
var='temp'

In [42]:
chunking_string = 'chunks_'+''.join(str(key) + str(value) for key, value in chunking_dict.items())
ard_rcTime_file_ID = 'BRAN2020-daily-'+var+'-'+chunking_string+'-v25042024.zarr'

In [44]:
from rechunker import rechunk
# Define target chunking
target_chunks = chunking_dict
target_store = BRAN2020_ard_path+ard_rcTime_file_ID
temp_store = BRAN2020_ard_path+'temp_store'  # Optional based on dataset size

# Execute rechunking
rechunk_plan = rechunk(ds, target_chunks, target_store=target_store, temp_store=temp_store,max_mem='4GB')

In [None]:
%%time
result = rechunk_plan.execute()

In [None]:
import zarr
zarr.consolidate_metadata(BRAN2020_ard_path+ard_rcTime_file_ID)

In [None]:
xr.open_zarr(BRAN2020_ard_path+ard_rcTime_file_ID,consolidated=True)

In [None]:
ard_rcTime_file_ID

In [None]:
BRAN2020_daily_rcTime =  BRAN2020_daily.chunk(chunking_dict)
BRAN2020_daily_rcTime

In [None]:
BRAN2020_daily_rcTime = remove_zarr_encoding(BRAN2020_daily_rcTime)

In [None]:
%%time
BRAN2020_daily_rcTime.to_zarr(BRAN2020_ard_path+ard_rcTime_file_ID,consolidated=True)

In [None]:
!touch /scratch/es60/ard/reanalysis/BRAN2020/ARD/logs/finished_BRAN2020-daily-temp-chunks_Time-1st_ocean1xt_ocean360yt_ocean150-v25042024-zarr.log

## end ARD workflow $\Uparrow$

# begin post-processing workflow $\Downarrow$

## load in both zarr collections

In [None]:
BRAN2020_ard_path = '/scratch/es60/ard/reanalysis/BRAN2020/'
ard_file_ID = 'BRAN2020-monthly-temp-v13112023.zarr'
ard_rcTime_file_ID = 'BRAN2020-monthly-temp-chunk4time-v13112023.zarr'

In [None]:
temp_chunked_time = xr.open_zarr(BRAN2020_ard_path + ard_rcTime_file_ID,consolidated=True)
temp_chunked = xr.open_zarr(BRAN2020_ard_path + ard_file_ID,consolidated=True)

## basic functions

In [None]:
def get_monthly_climatology(xr_object,time_coord_name = 'time',flox=True):
    if flox == True:
        monthly_climatology = xr_object.groupby(time_coord_name+'.month').mean(dim=time_coord_name,keep_attrs = True,method="cohorts", engine="flox")
    else:
        monthly_climatology = xr_object.groupby(time_coord_name+'.month').mean(dim=time_coord_name,keep_attrs = True)
    return monthly_climatology

def get_monthly_anomaly(xr_object,monthly_climatology, time_coord_name = 'time'):
    monthly_anomaly = xr_object.groupby(time_coord_name+'.month') - monthly_climatology
    return monthly_anomaly


# Climatology

In [None]:
temp_monthly_climatology = get_monthly_climatology(temp_chunked_time, time_coord_name = 'Time')
temp_monthly_climatology

In [None]:
temp_monthly_climatology_rc = temp_monthly_climatology.chunk({'st_ocean':10,'xt_ocean':3600,'month':1})

In [None]:
%%time
temp_monthly_climatology = temp_monthly_climatology.compute()

# CPU times: user 32.1 s, sys: 17.9 s, total: 49.9 s
# Wall time: 2min 20s
#

# Anomaly

In [None]:
temp_anomaly = get_monthly_anomaly(temp_chunked,temp_monthly_climatology_rc,time_coord_name='Time')

In [None]:
temp_anomaly

# define El Nino and La Nina using NCAR ONI data

In [None]:
ONI_DF = pd.read_csv('/g/data/xv83/users/tm4888/data/ENSO/NCAR_ONI.csv')
ONI_DF.set_index('datetime',inplace=True)
ONI_DF.index = pd.to_datetime(ONI_DF.index)
el_nino_threshold = 0.5
la_nina_threshold = -0.5
el_nino_threshold_months = ONI_DF["ONI"].ge(el_nino_threshold)
la_nina_threshold_months = ONI_DF["ONI"].le(la_nina_threshold) 
ONI_DF = pd.concat([ONI_DF, el_nino_threshold_months.rename('El Nino threshold')], axis=1)
ONI_DF = pd.concat([ONI_DF, la_nina_threshold_months.rename('La Nina threshold')], axis=1)
ONI_DF = pd.concat([ONI_DF, el_nino_threshold_months.diff().ne(0).cumsum().rename('El Nino event group ID')], axis=1)
ONI_DF = pd.concat([ONI_DF, la_nina_threshold_months.diff().ne(0).cumsum().rename('La Nina event group ID')], axis=1)

In [None]:
El_Nino_Series = ONI_DF.groupby('El Nino event group ID')['ONI'].filter(lambda x: len(x) >= 5,dropna=False).where(ONI_DF['El Nino threshold'] == True)
ONI_DF = pd.concat([ONI_DF, El_Nino_Series.rename('El Nino')], axis=1)
La_Nina_Series = ONI_DF.groupby('La Nina event group ID')['ONI'].filter(lambda x: len(x) >= 5,dropna=False).where(ONI_DF['La Nina threshold'] == True)
ONI_DF = pd.concat([ONI_DF, La_Nina_Series.rename('La Nina')], axis=1)
ONI_DF

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 4), layout='constrained')
ax.plot(ONI_DF.index,ONI_DF['El Nino'])
ax.xaxis.set_major_locator(mdates.YearLocator(base=2))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
for label in ax.get_xticklabels(which='major'):
    label.set(rotation=30, horizontalalignment='right')
plt.title('ONI El Nino events') 
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 4), layout='constrained')
ax.plot(ONI_DF.index,ONI_DF['La Nina'])
ax.xaxis.set_major_locator(mdates.YearLocator(base=2))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
for label in ax.get_xticklabels(which='major'):
    label.set(rotation=30, horizontalalignment='right')
plt.title('ONI La Nina events') 
plt.show()

## heatmap

### make table for average over certain region

In [None]:
%%time
max_lat = 5.0
min_lat = -5.0
max_lon = 240
min_lon = 190
LatIndexer, LonIndexer = 'yt_ocean', 'xt_ocean'
SliceData = temp_anomaly.temp.isel(st_ocean=0).sel(**{LatIndexer: slice(min_lat, max_lat),
                        LonIndexer: slice(min_lon, max_lon)})
SliceData = SliceData.chunk({'Time':-1,'yt_ocean':-1,'xt_ocean':-1,})
# THIS CURRENTLY IGNORES GRID AREA CORRECTION 
spatial_mean_of_ONI_anomaly = SliceData.mean({'yt_ocean','xt_ocean'})
spatial_mean_of_ONI_anomaly = spatial_mean_of_ONI_anomaly.compute()

In [None]:
spatial_mean_of_ONI_anomaly.plot()

In [None]:
spatial_mean_of_ONI_anomaly_DF = spatial_mean_of_ONI_anomaly.to_dataframe()
spatial_mean_of_ONI_anomaly_DF['year'] = pd.DatetimeIndex(spatial_mean_of_ONI_anomaly_DF.index).year
spatial_mean_of_ONI_anomaly_DF

In [None]:
heatmap_data = spatial_mean_of_ONI_anomaly_DF.pivot_table(index='month', columns='year', values='temp')

In [None]:
heatmap_data

In [None]:
plt.figure(figsize=(20, 10))
sns.heatmap(heatmap_data, cmap='RdBu_r', square=True,vmin=-2.5, vmax=2.5,linewidth=.5)
plt.title('Month by Year BRAN2020 anomalies in ONI (Nino3.4) region')
plt.show()

## stacked heatmap plot

In [None]:
ONI_DF_BRANtime = ONI_DF['1993-01':'2022-12']

In [None]:
event_data_EN = ONI_DF_BRANtime.pivot_table(index='month', columns='year', values='El Nino',dropna=False).fillna('')
event_data_LN = ONI_DF_BRANtime.pivot_table(index='month', columns='year', values='La Nina',dropna=False).fillna('')

In [None]:
plt.figure(figsize=(20,8))
sns.heatmap(heatmap_data, annot=False,cmap='RdBu_r', square=True,vmin=-2.5, vmax=2.5,linewidth=.5)
sns.heatmap(heatmap_data, annot=event_data_LN, annot_kws={'va':'top'}, fmt="", cbar=False,cmap='RdBu_r', square=True,vmin=-2.5, vmax=2.5,linewidth=.5)
sns.heatmap(heatmap_data, annot=event_data_EN, annot_kws={'va':'bottom'}, fmt="", cbar=False,cmap='RdBu_r', square=True,vmin=-2.5, vmax=2.5,linewidth=.5)
plt.title('Month by Year BRAN2020 anomalies in ONI (Nino3.4) region\n ONI values shown for defined ENSO events')

#### count of events in BRAN2020 period
El Nino = 8 events ( weak - strong ) as defined by ONI<br>
La Nina = 10 events ( weak - strong ) as defined by ONI <br>
El Nino months total = 70 = 19%
La Nina months total = 119 = 33%
Neutral months total = 171 = 48%


In [None]:
ONI_DF_BRANtime 

# filter BRAN2020 data by ENSO

In [None]:
ONI_DF_BRANtime = ONI_DF['1993-01':'2022-12']
ONI_DF_BRANtime['El Nino LOGICAL'] = ONI_DF_BRANtime['El Nino'].notnull()
ONI_DF_BRANtime['La Nina LOGICAL'] = ONI_DF_BRANtime['La Nina'].notnull()
ONI_DF_BRANtime

In [None]:
El_Nino_mask = ONI_DF_BRANtime['El Nino LOGICAL']
El_Nino_mask = El_Nino_mask.to_xarray()
El_Nino_mask = El_Nino_mask.rename({'datetime':'Time'})
sync_Time = temp_chunked_time.Time
El_Nino_mask['Time'] = sync_Time

In [None]:
La_Nina_mask = ONI_DF_BRANtime['La Nina LOGICAL']
La_Nina_mask = La_Nina_mask.to_xarray()
La_Nina_mask = La_Nina_mask.rename({'datetime':'Time'})
sync_Time = temp_chunked_time.Time
La_Nina_mask['Time'] = sync_Time

In [None]:
ONI_DF_BRANtime['Neutral LOGICAL'] = (ONI_DF_BRANtime['El Nino LOGICAL'] == False) & (ONI_DF_BRANtime['La Nina LOGICAL'] == False)

In [None]:
ONI_DF_BRANtime['El Nino LOGICAL'].sum()

In [None]:
ONI_DF_BRANtime['La Nina LOGICAL'].sum()

In [None]:
ONI_DF_BRANtime['Neutral LOGICAL'].sum()

# Test if they add to 360

In [None]:
ONI_DF_BRANtime['El Nino LOGICAL'].sum() + ONI_DF_BRANtime['La Nina LOGICAL'].sum()+ONI_DF_BRANtime['Neutral LOGICAL'].sum() == 360

In [None]:
ONI_DF_BRANtime

In [None]:
El_Nino_BRAN2020_temp = temp_chunked_time.temp.where(El_Nino_mask)
La_Nina_BRAN2020_temp = temp_chunked_time.temp.where(La_Nina_mask)

In [None]:
%%time
El_Nino_SST_mean = El_Nino_BRAN2020_temp.isel(st_ocean=0).mean('Time').compute()
La_Nina_SST_mean = La_Nina_BRAN2020_temp.isel(st_ocean=0).mean('Time').compute()

In [None]:
plt.figure(figsize=(16,8))
(El_Nino_SST_mean-La_Nina_SST_mean).plot()
plt.title('Mean monthly BRAN2020 SST for El Nino - La Nina events')

## El Nino "climatology"

In [None]:
%%time
El_Nino_temp_monthly_climatology = get_monthly_climatology(El_Nino_BRAN2020_temp, time_coord_name = 'Time')
El_Nino_temp_monthly_climatology_rc = El_Nino_temp_monthly_climatology.chunk({'st_ocean':10,'xt_ocean':3600,'month':1})
El_Nino_temp_monthly_climatology = El_Nino_temp_monthly_climatology.compute()


## La Nina "climatology"

In [None]:
%%time
La_Nina_temp_monthly_climatology = get_monthly_climatology(La_Nina_BRAN2020_temp, time_coord_name = 'Time')
La_Nina_temp_monthly_climatology_rc = La_Nina_temp_monthly_climatology.chunk({'st_ocean':10,'xt_ocean':3600,'month':1})
La_Nina_temp_monthly_climatology = La_Nina_temp_monthly_climatology.compute()

In [None]:
El_Nino_temp_monthly_climatology

In [None]:
diff_EN_LN_climatology = El_Nino_temp_monthly_climatology - La_Nina_temp_monthly_climatology

In [None]:
diff_EN_LN_climatology

In [None]:
%%time
max_lat = 5.0
min_lat = -5.0
max_lon = 240
min_lon = 190
LatIndexer, LonIndexer = 'yt_ocean', 'xt_ocean'
SliceData = diff_EN_LN_climatology.isel(st_ocean=0).sel(**{LatIndexer: slice(min_lat, max_lat),
                        LonIndexer: slice(min_lon, max_lon)})
SliceData = SliceData.chunk({'month':-1,'yt_ocean':-1,'xt_ocean':-1,})
# THIS CURRENTLY IGNORES GRID AREA CORRECTION 
spatial_mean_diff_EN_LN = SliceData.mean({'yt_ocean','xt_ocean'})
spatial_mean_diff_EN_LN = spatial_mean_diff_EN_LN.compute()

In [None]:
spatial_mean_diff_EN_LN.plot(ylim=[0,3])

In [None]:
%%time
max_lat = 5.0
min_lat = -5.0
max_lon = 240
min_lon = 190
LatIndexer, LonIndexer = 'yt_ocean', 'xt_ocean'
SliceData = El_Nino_temp_monthly_climatology.isel(st_ocean=0).sel(**{LatIndexer: slice(min_lat, max_lat),
                        LonIndexer: slice(min_lon, max_lon)})
SliceData = SliceData.chunk({'month':-1,'yt_ocean':-1,'xt_ocean':-1,})
# THIS CURRENTLY IGNORES GRID AREA CORRECTION 
spatial_mean_EN = SliceData.mean({'yt_ocean','xt_ocean'})
spatial_mean_EN = spatial_mean_EN.compute()
spatial_mean_EN.plot()

# Mean, Median, Max , Min

In [None]:
El_Nino_mean = El_Nino_BRAN2020_temp.mean('Time')
El_Nino_median = El_Nino_BRAN2020_temp.median('Time')
El_Nino_max = El_Nino_BRAN2020_temp.max('Time')
El_Nino_min = El_Nino_BRAN2020_temp.min('Time')
El_Nino_std = El_Nino_BRAN2020_temp.std('Time')
El_Nino_quant = El_Nino_BRAN2020_temp.quantile([0.05,0.95],skipna=True,dim='Time')

In [None]:
%%time
El_Nino_quant.isel(quantile=0).isel(st_ocean=0).plot(robust=True,vmin=-1,vmax=35)

In [None]:
%%time
El_Nino_quant.isel(quantile=1).isel(st_ocean=0).plot(robust=True,vmin=-1,vmax=35)

In [None]:
def get_slice(xr_object)
    max_lat = 5.0
    min_lat = -5.0
    max_lon = 240
    min_lon = 190
    LatIndexer, LonIndexer = 'yt_ocean', 'xt_ocean'
    SliceData = xr_object.sel(**{LatIndexer: slice(min_lat, max_lat),
                        LonIndexer: slice(min_lon, max_lon)})

In [None]:
El_Nino_min.isel(st_ocean=0).plot(robust=True,vmin=-1,vmax=35)

In [None]:
El_Nino_max.isel(st_ocean=0).plot(robust=True,vmin=-1,vmax=35)

In [None]:
diff = El_Nino_max.isel(st_ocean=0) - El_Nino_min.isel(st_ocean=0) 
diff.plot(robust=True)

In [None]:
El_Nino_std.isel(st_ocean=0).plot(robust=True)

# The End

In [None]:
client.shutdown()

## Plot current vectors for August

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)
import matplotlib.ticker as ticker
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy
from matplotlib import mlab, cm, gridspec
import matplotlib.ticker as mticker
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
%matplotlib inline 

In [None]:
%%time
# Define the u and v components of the currents
time_choice = 8
u = clim_uv.u.sel(month=time_choice)
v = clim_uv.v.sel(month=time_choice)
speed = np.sqrt(u**2 + v**2)

In [None]:
#plot model data
transform = ccrs.PlateCarree()
cmap = 'Spectral_r'
cbar_label='current speed'
plot_data = speed

###
fig = plt.figure(num=None, figsize=(8, 6), dpi=300, facecolor='w', edgecolor='k')
ax = plt.subplot(projection=ccrs.PlateCarree(180))
ax.set_extent([142,160, -25, -10], ccrs.PlateCarree())
ax.add_feature(cfeature.NaturalEarthFeature('physical', 'land', '50m', edgecolor='face', facecolor='white'))
ax.coastlines('50m',linewidth=0.5,edgecolor='grey')
plot_data.plot(transform=transform,cmap=cmap,cbar_kwargs={'label': cbar_label,'shrink':0.5},robust=True)

#plot u/v vectors
# Define the x and y coordinates
x = clim_uv.xu_ocean
y = clim_uv.yu_ocean
ax.quiver(x.values,y.values,u.values,v.values,transform=transform, units='x', width=0.01, scale=0.7, headwidth=2,alpha=0.2)
ax.set_title('BRAN2020 1993-2022\ncurrent speed \n August Climatology')