In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import xarray as xr
import numpy as np
import geopandas as gp

from unseen import fileio
from unseen import time_utils

Matplotlib is building the font cache; this may take a moment.


## Observations

In [2]:
agcd_file = "/g/data/xv83/dbi599/ag/data/pr_agcd_1900-2019_A-DEC-mean_wheat-sheep-mean.zarr.zip"

In [3]:
agcd_ds = fileio.open_dataset(agcd_file)

In [4]:
agcd_ds

Unnamed: 0,Array,Chunk
Bytes,3.78 kiB,32 B
Shape,"(121, 4)","(1, 4)"
Count,122 Tasks,121 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.78 kiB 32 B Shape (121, 4) (1, 4) Count 122 Tasks 121 Chunks Type float64 numpy.ndarray",4  121,

Unnamed: 0,Array,Chunk
Bytes,3.78 kiB,32 B
Shape,"(121, 4)","(1, 4)"
Count,122 Tasks,121 Chunks
Type,float64,numpy.ndarray


In [5]:
agcd_ds['pr'] = agcd_ds['pr'] * 365
agcd_ds['pr'].attrs['units'] = 'mm yr-1'

In [6]:
years = agcd_ds['time'].dt.year.values
agcd_df = pd.DataFrame(index=years)
agcd_df['all'] = agcd_ds['pr'].sel(region='all').values
agcd_df['south-west'] = agcd_ds['pr'].sel(region='south-west').values
agcd_df['south-east'] = agcd_ds['pr'].sel(region='south-east').values
agcd_df['north-east'] = agcd_ds['pr'].sel(region='north-east').values

In [7]:
agcd_df

Unnamed: 0,all,south-west,south-east,north-east
1900,464.811006,475.969258,364.933123,527.053421
1901,400.237197,350.454185,303.321759,504.458390
1902,311.070922,375.835685,241.523708,313.200854
1903,545.881173,426.354388,402.868296,732.982318
1904,497.322876,504.010219,337.707338,604.851234
...,...,...,...,...
2016,580.077608,463.919797,533.637226,696.759149
2017,470.199229,432.314060,365.637155,571.200521
2018,357.549319,377.360680,240.977893,425.286678
2019,246.779661,265.421957,217.078133,254.208977


In [8]:
agcd_df.quantile(0.33)

all           451.652868
south-west    382.220147
south-east    338.208430
north-east    551.451692
Name: 0.33, dtype: float64

In [9]:
agcd_df.quantile(0.66)

all           520.235589
south-west    436.211417
south-east    401.178648
north-east    670.629388
Name: 0.66, dtype: float64

In [10]:
agcd_terciles_df = pd.DataFrame(index=years)
agcd_terciles_df['south-east'] = pd.qcut(agcd_df['south-east'], q=3, labels=['dry', 'normal', 'wet'])
agcd_terciles_df['south-west'] = pd.qcut(agcd_df['south-west'], q=3, labels=['dry', 'normal', 'wet'])
agcd_terciles_df['north-east'] = pd.qcut(agcd_df['north-east'], q=3, labels=['dry', 'normal', 'wet'])

In [11]:
agcd_terciles_df[-30:]

Unnamed: 0,south-east,south-west,north-east
1991,dry,dry,normal
1992,wet,wet,normal
1993,wet,normal,dry
1994,dry,dry,dry
1995,wet,wet,normal
1996,normal,normal,wet
1997,dry,normal,normal
1998,normal,normal,wet
1999,wet,wet,wet
2000,wet,wet,normal


In [12]:
agcd_terciles_df.groupby(['south-west', 'south-east', 'north-east']).size()

south-west  south-east  north-east
dry         dry         dry           15
                        normal         5
                        wet            1
            normal      dry            2
                        normal         7
                        wet            3
            wet         dry            0
                        normal         3
                        wet            4
normal      dry         dry            7
                        normal         4
                        wet            2
            normal      dry            3
                        normal        10
                        wet            3
            wet         dry            3
                        normal         1
                        wet            7
wet         dry         dry            3
                        normal         3
                        wet            0
            normal      dry            7
                        normal         1
                      

In [13]:
agcd_terciles_pct = (agcd_terciles_df.groupby(['south-west', 'south-east', 'north-east']).size() / 120) * 100
agcd_terciles_pct.round(1)

south-west  south-east  north-east
dry         dry         dry           12.5
                        normal         4.2
                        wet            0.8
            normal      dry            1.7
                        normal         5.8
                        wet            2.5
            wet         dry            0.0
                        normal         2.5
                        wet            3.3
normal      dry         dry            5.8
                        normal         3.3
                        wet            1.7
            normal      dry            2.5
                        normal         8.3
                        wet            2.5
            wet         dry            2.5
                        normal         0.8
                        wet            5.8
wet         dry         dry            2.5
                        normal         2.5
                        wet            0.0
            normal      dry            5.8
                   

In [14]:
all_dry = (agcd_terciles_df['south-west'] == 'dry') & (agcd_terciles_df['south-east'] == 'dry') & (agcd_terciles_df['north-east'] == 'dry')

In [15]:
agcd_terciles_df[all_dry].index.values

array([1901, 1902, 1922, 1937, 1940, 1944, 1948, 1957, 1972, 1980, 1994,
       2002, 2009, 2018, 2019])

In [16]:
all_wet = (agcd_terciles_df['south-west'] == 'wet') & (agcd_terciles_df['south-east'] == 'wet') & (agcd_terciles_df['north-east'] == 'wet')

In [17]:
agcd_terciles_df[all_wet].index.values

array([1917, 1920, 1921, 1931, 1955, 1958, 1963, 1971, 1973, 1974, 1975,
       1978, 1988, 1999, 2011, 2016])

You can pick out the dry-dry-dry and wet-wet-wet years on the BoM [121 Years of Australian Rainfall](http://www.bom.gov.au/climate/history/rainfall/) page.

## Forecast data

In [18]:
cafe_bc_file = "/g/data/xv83/dbi599/ag/data/pr_cafe-c5-d60-pX-f6_19950501-20191101_A-DEC-mean_wheat-sheep-mean_bias-corrected-agcd-additive.zarr.zip"

In [19]:
cafe_bc_ds = fileio.open_dataset(cafe_bc_file)

In [20]:
cafe_bc_ds

Unnamed: 0,Array,Chunk
Bytes,4.47 kiB,4.47 kiB
Shape,"(11, 52)","(11, 52)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 4.47 kiB 4.47 kiB Shape (11, 52) (11, 52) Count 2 Tasks 1 Chunks Type object numpy.ndarray",52  11,

Unnamed: 0,Array,Chunk
Bytes,4.47 kiB,4.47 kiB
Shape,"(11, 52)","(11, 52)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.68 MiB,33.00 kiB
Shape,"(52, 11, 96, 4)","(1, 11, 96, 4)"
Count,53 Tasks,52 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.68 MiB 33.00 kiB Shape (52, 11, 96, 4) (1, 11, 96, 4) Count 53 Tasks 52 Chunks Type float64 numpy.ndarray",52  1  4  96  11,

Unnamed: 0,Array,Chunk
Bytes,1.68 MiB,33.00 kiB
Shape,"(52, 11, 96, 4)","(1, 11, 96, 4)"
Count,53 Tasks,52 Chunks
Type,float64,numpy.ndarray


In [21]:
cafe_bc_ds['pr'] = cafe_bc_ds['pr'] * 365
cafe_bc_ds['pr'].attrs['units'] = 'mm yr-1'

In [22]:
cafe_samples = cafe_bc_ds['pr'].sel(lead_time=slice(3, None)).stack({'sample': ['ensemble', 'init_date', 'lead_time']})

In [25]:
cafe_samples

Unnamed: 0,Array,Chunk
Bytes,1.22 MiB,39.00 kiB
Shape,"(4, 39936)","(4, 1248)"
Count,492 Tasks,94 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.22 MiB 39.00 kiB Shape (4, 39936) (4, 1248) Count 492 Tasks 94 Chunks Type float64 numpy.ndarray",39936  4,

Unnamed: 0,Array,Chunk
Bytes,1.22 MiB,39.00 kiB
Shape,"(4, 39936)","(4, 1248)"
Count,492 Tasks,94 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,312.00 kiB,312.00 kiB
Shape,"(39936,)","(39936,)"
Count,7 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 312.00 kiB 312.00 kiB Shape (39936,) (39936,) Count 7 Tasks 1 Chunks Type object numpy.ndarray",39936  1,

Unnamed: 0,Array,Chunk
Bytes,312.00 kiB,312.00 kiB
Shape,"(39936,)","(39936,)"
Count,7 Tasks,1 Chunks
Type,object,numpy.ndarray


In [26]:
cafe_samples = time_utils.select_time_period(cafe_samples, ['1995-01-01', '2019-12-31'])

In [28]:
cafe_samples = cafe_samples.dropna(dim='sample')

In [30]:
nsamples = cafe_samples['sample'].shape[0]

In [31]:
nsamples

25536

In [32]:
samples = np.arange(nsamples) + 1
cafe_df = pd.DataFrame(index=samples)
cafe_df['south-west'] = cafe_samples.sel(region='south-west').values
cafe_df['south-east'] = cafe_samples.sel(region='south-east').values
cafe_df['north-east'] = cafe_samples.sel(region='north-east').values

In [33]:
cafe_df

Unnamed: 0,south-west,south-east,north-east
1,378.860214,547.465443,582.065343
2,502.557845,503.186171,1096.213955
3,442.284121,370.629551,491.899484
4,449.104665,370.900393,696.271775
5,461.590521,358.333124,559.001504
...,...,...,...
25532,435.509655,347.268680,668.803591
25533,487.475433,369.739405,685.110427
25534,625.419925,439.606925,594.177701
25535,369.433097,593.544863,752.729511


In [34]:
cafe_terciles_df = pd.DataFrame(index=samples)
cafe_terciles_df['south-east'] = pd.qcut(cafe_df['south-east'], q=3, labels=['dry', 'normal', 'wet'])
cafe_terciles_df['south-west'] = pd.qcut(cafe_df['south-west'], q=3, labels=['dry', 'normal', 'wet'])
cafe_terciles_df['north-east'] = pd.qcut(cafe_df['north-east'], q=3, labels=['dry', 'normal', 'wet'])

In [35]:
cafe_terciles_df

Unnamed: 0,south-east,south-west,north-east
1,wet,normal,normal
2,wet,wet,wet
3,normal,wet,dry
4,normal,wet,wet
5,normal,wet,normal
...,...,...,...
25532,normal,wet,wet
25533,normal,wet,wet
25534,wet,wet,normal
25535,wet,normal,wet


In [36]:
cafe_terciles_pct = (cafe_terciles_df.groupby(['south-west', 'south-east', 'north-east']).size() / nsamples) * 100
cafe_terciles_pct.round(1)

south-west  south-east  north-east
dry         dry         dry           7.2
                        normal        4.0
                        wet           1.6
            normal      dry           3.2
                        normal        4.7
                        wet           3.5
            wet         dry           0.8
                        normal        2.7
                        wet           5.5
normal      dry         dry           6.4
                        normal        3.2
                        wet           1.2
            normal      dry           3.7
                        normal        4.7
                        wet           3.0
            wet         dry           1.1
                        normal        3.3
                        wet           6.7
wet         dry         dry           6.2
                        normal        2.7
                        wet           0.8
            normal      dry           3.6
                        normal        4.3

Expressed as a percentage of all years, wet-wet-wet and dry-dry-dry are the most common combinations in both the observations and model, but they are relatively more common in the observations.