In [None]:
import xarray as xr
import accessvis
import glob
import iris
from esmvalcore.preprocessor import regrid
from ncdata.iris_xarray import cubes_from_xarray
from scipy.ndimage import gaussian_filter

import seaborn as sns

import warnings
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import os
import xarray as xr
from tqdm import tqdm_notebook as tqdm
import dask.bag as dasb
from dask.diagnostics import ProgressBar
from scipy import signal
from scipy.stats import spearmanr, norm
import cartopy.crs as ccrs
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import xskillscore as xskill
import cmocean
import cartopy.feature as cfeature
import matplotlib.colors as mcolors

import sys
sys.path.append('/home/548/cxc548/lib/python/bom-climate-change-variability-and-extreme-toolbox')

# Load data and Calculate Ensemble mean

In [2]:
from esmvalcore.dataset import Dataset

In [3]:
var_cesm2 = Dataset(
    short_name='ts', mip='Amon', project='CMIP6',
    activity='CMIP',  dataset='CESM2',  ##can edit dataset
    ensemble='*',  ## '*' is finding all ensembles it can instead of defined ones
    institute='*', grid='*',
    exp='historical', timerange='*',
)
cube_cesm2=var_cesm2.load()

In [3]:
print(cube_cesm2.coords()[0])

DimCoord :  time / (days since 1850-1-1 00:00:00, 365_day calendar)
    points: [
        1850-01-15 12:00:00, 1850-02-14 00:00:00, ...,
        2014-11-15 00:00:00, 2014-12-15 12:00:00]
    bounds: [
        [1850-01-01 00:00:00, 1850-02-01 00:00:00],
        [1850-02-01 00:00:00, 1850-03-01 00:00:00],
        ...,
        [2014-11-01 00:00:00, 2014-12-01 00:00:00],
        [2014-12-01 00:00:00, 2015-01-01 00:00:00]]
    shape: (1980,)  bounds(1980, 2)
    dtype: float64
    standard_name: 'time'
    var_name: 'time'
    attributes:
        title  'time'
        type   'double'


In [4]:
var_canesm5 = Dataset(
    short_name='ts', mip='Amon', project='CMIP6',
    activity='CMIP',  dataset='CanESM5',  ##can edit dataset
    ensemble='*',  ## '*' is finding all ensembles it can instead of defined ones
    institute='*', grid='*',
    exp='historical', timerange='*',
)
cube_canesm5=var_canesm5.load()

In [5]:
# cube_canesm5

In [6]:
var_miroc6 = Dataset(
    short_name='ts', mip='Amon', project='CMIP6',
    activity='CMIP',  dataset='MIROC6',  ##can edit dataset
    ensemble='*',  ## '*' is finding all ensembles it can instead of defined ones
    institute='*', grid='*',
    exp='historical', timerange='*',
)
cube_miroc6=var_miroc6.load()

In [7]:
# cube_miroc6

In [8]:
var_noresm2 = Dataset(
    short_name='ts', mip='Amon', project='CMIP6',
    activity='CMIP',  dataset='NorESM2-LM',  ##can edit dataset
    ensemble='*',  ## '*' is finding all ensembles it can instead of defined ones
    institute='*', grid='*',
    exp='historical', timerange='*',
)
cube_noresm2=var_noresm2.load()

In [9]:
# cube_noresm2

In [10]:
var_esm1_5 = Dataset(
    short_name='ts', mip='Amon', project='CMIP6',
    activity='CMIP',  dataset='ACCESS-ESM1-5',  ##can edit dataset
    ensemble='*',  ## '*' is finding all ensembles it can instead of defined ones
    institute='*', grid='*',
    exp='historical', timerange='*',
)
cube_esm1_5=var_esm1_5.load()

In [11]:
# cube_esm1_5

### ScenarioMIP

In [None]:
sce_esm1_5 = Dataset(
    short_name='ts', mip='Amon', project='CMIP6',
    activity='ScenarioMIP',  dataset='ACCESS-ESM1-5',  ##can edit dataset
    ensemble='*',  ## '*' is finding all ensembles it can instead of defined ones
    institute='*', grid='*',
    exp='ssp370', timerange='*',
)
cube_sce_esm1_5=sce_esm1_5.load()

### Dask client

In [20]:
import dask.distributed as dask

# client = dask.Client(threads_per_worker=1,n_workers=4, memory_limit='64GB')
client = dask.Client(threads_per_worker=1)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 2
Total threads: 2,Total memory: 9.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:38073,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B

0,1
Comm: tcp://127.0.0.1:42953,Total threads: 1
Dashboard: /proxy/35853/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:43735,
Local directory: /jobfs/155854705.gadi-pbs/dask-scratch-space/worker-g2d9qkqj,Local directory: /jobfs/155854705.gadi-pbs/dask-scratch-space/worker-g2d9qkqj

0,1
Comm: tcp://127.0.0.1:44871,Total threads: 1
Dashboard: /proxy/40479/status,Memory: 4.50 GiB
Nanny: tcp://127.0.0.1:38239,
Local directory: /jobfs/155854705.gadi-pbs/dask-scratch-space/worker-3q85op20,Local directory: /jobfs/155854705.gadi-pbs/dask-scratch-space/worker-3q85op20


In [None]:
## 

In [4]:
def calc_trend(xarr, start,end,length):
    #loop over years
    for i in np.arange(start,end,1):
        startyear=i
        endyear=i+length
        #extract window
        arr = xarr.sel(year=slice(str(startyear),str(endyear)))
    
        timedim=np.arange(0,len(arr.year))
        arr['year'] = timedim
        #call polyfit to calculate trend over the window. multiply by 10 so we get units of trend per decade
        trnd = ((arr.polyfit(dim='year',deg=1,skipna=True)).polyfit_coefficients.isel(degree=0))*10
 
        #if it's the first year, copy the trend to trnarr
        if i == start:
            trnarr = trnd
        #if it's the subsequent years, concatenate the trend to trnarr
        else:
            trnarr = xr.concat([trnarr,trnd],dim='year')

    trnarr['year'] = np.arange(start,end,1)

    return trnarr

In [5]:
def regrid_dataset(data):
    cube_data=cubes_from_xarray(data)[0]
    cube_regrided = regrid(cube_data, target_grid="1x1", scheme="linear")
    return xr.DataArray.from_iris(cube_regrided)

In [6]:
def pre_processing_trend_write(cube, dataset_name, output_path=None, start_year=1850, end_year=2014):
    data=xr.DataArray.from_iris(cube)
    
    ds_model_ann=data.groupby('time.year').mean('time')
    ds_model_ann_trend_31 = calc_trend(ds_model_ann,start_year,end_year-31,31)
    ds_model_ann_trend_51 = calc_trend(ds_model_ann,start_year,end_year-51,51)

    os.makedirs(f"{output_path}/{dataset_name}_trend", exist_ok=True)

    ds_model_ann_trend_31.to_netcdf(f"{output_path}/{dataset_name}_trend/{dataset_name}_ts_31_years_trend_{start_year}_{end_year-31}.nc")
    ds_model_ann_trend_51.to_netcdf(f"{output_path}/{dataset_name}_trend/{dataset_name}_ts_51_years_trend_{start_year}_{end_year-51}.nc")

In [7]:
def pre_processing_SSTA_annually_rolling_write(cube, dataset_name, output_path=None):
    data=xr.DataArray.from_iris(cube)

    ds_model = data.unify_chunks()
    ds_model_anoms = ds_model.groupby('time.month') - ds_model.sel(time=slice('1850','1900')).groupby('time.month').mean('time')
    ds_model_anoms_rolling = ds_model_anoms.rolling(time=12, center=True).mean()

    # path=f"{output_path}/ACCESS_ESM1_5_annually_rolling"
    os.makedirs(f"{output_path}/{dataset_name}", exist_ok=True)

    ds_model_anoms_rolling.to_netcdf(f"{output_path}/{dataset_name}/{dataset_name}_rolling_SSTA.nc")

In [29]:
pre_processing_trend_write(cube_esm1_5,"ACCESS_ESM1_5", output_path="/g/data/kj13/datasets/visualisation_projects/uncharted_future/trend")    

In [33]:
pre_processing_SSTA_annually_rolling_write(cube_esm1_5, "ACCESS_ESM1_5", output_path="/g/data/kj13/datasets/visualisation_projects/uncharted_future/Rolling_SSTA/")

In [48]:
path="/g/data/kj13/datasets/visualisation_projects/uncharted_future/Rolling_SSTA/"
dataset_list={
    "CESM2":cube_cesm2,
    "NORESM2":cube_noresm2,
    "MIROC6":cube_miroc6,
    "CANESM5":cube_canesm5,
}
for dataset, cube in dataset_list.items():
    pre_processing_SSTA_annually_rolling_write(cube, dataset, output_path=path)

cannot be safely cast to variable data type
  var = variable[keys]
cannot be safely cast to variable data type
  var = variable[keys]
cannot be safely cast to variable data type
  var = variable[keys]
cannot be safely cast to variable data type
  var = variable[keys]
cannot be safely cast to variable data type
  var = variable[keys]


In [18]:
path="/g/data/kj13/datasets/visualisation_projects/uncharted_future/trend"
dataset_list={
    "CESM2":cube_cesm2,
    "NORESM2":cube_noresm2,
    "MIROC6":cube_miroc6,
    "CANESM5":cube_canesm5,
}
for dataset, cube in dataset_list.items():
    pre_processing_trend_write(cube, dataset, output_path=path)

cannot be safely cast to variable data type
  var = variable[keys]
cannot be safely cast to variable data type
  var = variable[keys]
cannot be safely cast to variable data type
  var = variable[keys]
cannot be safely cast to variable data type
  var = variable[keys]
cannot be safely cast to variable data type
  var = variable[keys]
cannot be safely cast to variable data type
  var = variable[keys]
cannot be safely cast to variable data type
  var = variable[keys]
cannot be safely cast to variable data type
  var = variable[keys]


### ScenarioMIP Trend

In [None]:
path="/g/data/kj13/datasets/visualisation_projects/uncharted_future/trend/future"
pre_processing_trend_write(cube_sce_esm1_5,"ACCESS_ESM1_5", output_path=path)    

# HadISST

In [18]:
cube_hadisst=iris.load("/g/data/ct11/access-nri/replicas/esmvaltool/obsdata-v2/Tier2/HadISST/OBS_HadISST_reanaly_1_Amon_ts_187001-202112.nc")

In [19]:
cube_hadisst[0]

Surface Temperature (K),time,latitude,longitude
Shape,1824,180,360
Dimension coordinates,,,
time,x,-,-
latitude,-,x,-
longitude,-,-,x
Cell methods,,,
0,time: mean,time: mean,time: mean
Attributes,,,
comment,"'""""skin"""" temperature (i.e., SST for open ocean)'","'""""skin"""" temperature (i.e., SST for open ocean)'","'""""skin"""" temperature (i.e., SST for open ocean)'"
conventions,'CF/CMOR','CF/CMOR','CF/CMOR'


In [22]:
data=xr.DataArray.from_iris(cube_hadisst[0])
data.time

In [12]:
path="/g/data/kj13/datasets/visualisation_projects/uncharted_future/Rolling_SSTA/"
pre_processing_SSTA_annually_rolling_write(cube_hadisst[0], "HadISST", output_path=path)

# ERA5-025

In [29]:
import glob
files_era5=glob.glob("/g/data/rt52/era5/single-levels/monthly-averaged/sst/*/*")

In [39]:
cube_era5=iris.load(
    "/g/data/rt52/era5/single-levels/monthly-averaged/sst/*/*",
)

In [40]:
import iris.util
iris.util.equalise_attributes(cube_era5)

cube_era5 = cube_era5.concatenate_cube()
# trim 2025
cube_era5=cube_era5[:-8]

In [43]:
def pre_processing_SSTA_annually_rolling_write_ear5_monthly(cube, dataset_name, output_path=None, start_year="1850", end_year="1900"):
    data=xr.DataArray.from_iris(cube)

    ds_model = data.unify_chunks()
    ds_model_anoms = ds_model.groupby('time.month') - ds_model.sel(time=slice(start_year,end_year)).groupby('time.month').mean('time')
    # Add this rechunk to 
    ds_model_anoms = ds_model_anoms.chunk({'time': -1})
    ds_model_anoms_rolling = ds_model_anoms.rolling(time=12, center=True).mean()

    # path=f"{output_path}/ACCESS_ESM1_5_annually_rolling"
    os.makedirs(f"{output_path}/{dataset_name}", exist_ok=True)

    ds_model_anoms_rolling.to_netcdf(f"{output_path}/{dataset_name}/{dataset_name}_rolling_SSTA.nc")

In [44]:
path="/g/data/kj13/datasets/visualisation_projects/uncharted_future/Rolling_SSTA/"
pre_processing_SSTA_annually_rolling_write_ear5_monthly(cube_era5, "ERA5-025", output_path=path, start_year="1959", end_year="2009")