In [1]:
import xarray as xr
from dask_jobqueue import PBSCluster
from dask.distributed import Client
import numpy as np
import matplotlib.pyplot as plt
import datetime
import flox
import flox.xarray
import pandas as pd
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter


In [2]:
client = Client(threads_per_worker=1)

2024-06-04 09:11:11,836 - distributed.preloading - INFO - Creating preload: /g/data/hh5/public/apps/dask-optimiser/schedplugin.py
2024-06-04 09:11:11,841 - distributed.utils - INFO - Reload module schedplugin from .py file
2024-06-04 09:11:11,849 - distributed.preloading - INFO - Import preload module: /g/data/hh5/public/apps/dask-optimiser/schedplugin.py


Modifying workers


In [5]:
# Function to calculate monthly climatology and convert to single precision
def monthly_climatology(ds, time_dim):
    """
    Calculate the monthly climatology for a given dataset.

    This function takes an xarray dataset and computes the monthly climatology
    by averaging data for each month over all years. The function adds a 'month'
    dimension to the dataset based on the provided time dimension.

    Parameters:
        ds (xarray.Dataset): The input dataset containing the time dimension.
        time_dim (str): The name of the time dimension in the dataset.

    Returns
        xarray.Dataset: Dataset with the mean values for each month across all years. 
        The data type of returned values is float32.
    """
    ds['month'] = ds[time_dim].dt.month
    climatology_cohorts = flox.xarray.xarray_reduce(
        ds,
        'month',
        func='mean',
        method='cohorts',
    )
    return climatology_cohorts.astype(np.float32)

In [6]:
# Directory paths
dir1 = '/scratch/xv83/rxm599/historical/'
dir2 = '/scratch/xv83/rxm599/future/'
dir0 = '/scratch/xv83/rxm599/pi/'

# Load datasets with chunking
dbgc1 = xr.open_mfdataset(dir1 + "files*.nc", parallel=True, chunks={'TIME41': 10})
dbgc2 = xr.open_mfdataset(dir2 + "files*.nc", parallel=True, chunks={'TIME41': 10})
dbgc0 = xr.open_mfdataset(dir0 + "files*.nc", parallel=True, chunks={'TIME41': 10})

In [7]:
# Set GWL periods
GWL_periods = {
    'current': ('1995-01-01', '2014-12-31'),
    'GW1p2': ('2001-01-01', '2020-12-31'),
    'GW1p5': ('2015-01-01', '2034-12-31'),
    'GW2p0': ('2030-01-01', '2049-12-31'),
    'GW3p0': ('2053-01-01', '2072-12-31'),
    'GW4p0': ('2074-01-01', '2093-12-31')
}

In [8]:
# Process and save climatology data
def process_climatology(ds, time_dim, start, end, variable, period):
    data = getattr(ds, variable).sel(**{time_dim: slice(start, end)}).persist() 
    clim = monthly_climatology(data, time_dim).persist()

    file_path = f'/g/data/ia39/ncra/ocean/peacey/{variable}_climatology_{period}.nc'
    
    # Save as netCDF
    clim.to_netcdf(file_path, compute=True)

In [9]:
%%time 
# Process current period
process_climatology(dbgc1, 'TIME41', *GWL_periods['current'], 'OAR', 'current')
process_climatology(dbgc1, 'TIME41', *GWL_periods['current'], 'PH', 'current')

In [10]:
%%time 
# Process PI
process_climatology(dbgc0, 'TIME41', *GWL_periods['current'], 'OAR', 'PI')
process_climatology(dbgc0, 'TIME41', *GWL_periods['current'], 'PH', 'PI')

In [12]:
%%time 
# Process future
for period, (start, end) in GWL_periods.items():
    if period == 'current':
        continue
    process_climatology(dbgc2, 'TIME41', start, end, 'OAR', period)
    process_climatology(dbgc2, 'TIME41', start, end, 'PH', period)

CPU times: user 2min 36s, sys: 39.4 s, total: 3min 16s
Wall time: 7min 50s


In [13]:
# Directory paths for SST
dir1_new = '/g/data/fp2/OFAM3/jra55_historical.1/surface/'
dir2_new = '/g/data/fp2/OFAM3/jra55_rcp8p5/surface/'

# Load datasets with chunking
dsst1 = xr.open_mfdataset(dir1_new + 'ocean_temp_sfc_*', parallel=True, chunks={'Time': 10})
dsst2 = xr.open_mfdataset(dir2_new + 'ocean_temp_sfc*.nc', parallel=True, chunks={'Time': 10})

# Process SST
for period, (start, end) in GWL_periods.items():
    if period == 'current':
        process_climatology(dsst1, 'Time', start, end, 'temp', period)
    else:
        process_climatology(dsst2, 'Time', start, end, 'temp', period)