### Start dask cluster

In [18]:
from dask.distributed import Client

client = Client("tcp://10.32.5.46:44639")
client

0,1
Client  Scheduler: tcp://10.32.5.46:44639  Dashboard: /user/0000-0002-8701-4506/proxy/8787/status,Cluster  Workers: 4  Cores: 8  Memory: 46.00 GB


### Import required packages

In [19]:
import intake
import xarray as xr
from matplotlib import pyplot as plt
import numpy as np
import xgcm
import dask
import pprint
import gsw
import xesmf as xe
from xhistogram.xarray import histogram
from datetime import datetime

# local file for CMIP6 preprocessing
import preprocessing

### Locate UKESM1 ozone data

In [20]:
col_url='https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json'
col = intake.open_esm_datastore(col_url)
cat = col.search(institution_id='MOHC',
                 source_id='UKESM1-0-LL',
                 table_id='Amon',
                 experiment_id=['historical','ssp126','ssp585'],
                 variable_id=['o3'],
                 member_id=['r1i1p1f2'],
                 grid_label='gn')
cat.df

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year
0,CMIP,MOHC,UKESM1-0-LL,historical,r1i1p1f2,Amon,o3,gn,gs://cmip6/CMIP/MOHC/UKESM1-0-LL/historical/r1...,
1,ScenarioMIP,MOHC,UKESM1-0-LL,ssp126,r1i1p1f2,Amon,o3,gn,gs://cmip6/ScenarioMIP/MOHC/UKESM1-0-LL/ssp126...,
2,ScenarioMIP,MOHC,UKESM1-0-LL,ssp585,r1i1p1f2,Amon,o3,gn,gs://cmip6/ScenarioMIP/MOHC/UKESM1-0-LL/ssp585...,


In [21]:
dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': True},
                                cdf_kwargs={'chunks': {}, 'decode_times': True})
dset_dict.keys()

Progress: |███████████████████████████████████████████████████████████████████████████████| 100.0% 

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'
             
--> There are 3 group(s)


dict_keys(['ScenarioMIP.MOHC.UKESM1-0-LL.ssp126.Amon.gn', 'CMIP.MOHC.UKESM1-0-LL.historical.Amon.gn', 'ScenarioMIP.MOHC.UKESM1-0-LL.ssp585.Amon.gn'])

### Preprocess the historical, ssp126, and ssp585 datasets

In [22]:
historical = dset_dict['CMIP.MOHC.UKESM1-0-LL.historical.Amon.gn']
ssp126 = dset_dict['ScenarioMIP.MOHC.UKESM1-0-LL.ssp126.Amon.gn']
ssp585 = dset_dict['ScenarioMIP.MOHC.UKESM1-0-LL.ssp585.Amon.gn']

historical = preprocessing.combined_preprocessing(historical)
ssp126 = preprocessing.combined_preprocessing(ssp126)
ssp585 = preprocessing.combined_preprocessing(ssp585)

### Drop the coordinates that are not needed

In [23]:
historical = historical.drop(('lon_bnds','time_bounds','lat_bnds','member_id','bnds'))
ssp126 = ssp126.drop(('lon_bnds','time_bounds','lat_bnds','member_id','bnds'))
ssp585 = ssp585.drop(('lon_bnds','time_bounds','lat_bnds','member_id','bnds'))

### Calculate annual mean profiles

In [24]:
historical = historical.groupby('time.year').mean('time')
ssp126 = ssp126.groupby('time.year').mean('time')
ssp585 = ssp585.groupby('time.year').mean('time')

### Slice one decade from each experiment

In [25]:
historical = historical.sel(year=slice(2004,2014))
ssp126 = ssp126.sel(year=slice(2090,2100))
ssp585 = ssp585.sel(year=slice(2090,2100))

historical = historical.squeeze()
ssp126 = ssp126.squeeze()
ssp585 = ssp585.squeeze()

# scale to get units in ppmv ()
historical['o3'] = historical.o3*1e6
ssp126['o3'] = ssp126.o3*1e6
ssp585['o3'] = ssp585.o3*1e6

### Prepare attributes for combined NetCDF file

In [26]:
historical.attrs['Prepared by'] = 'D. Jones'
historical.attrs['Institute'] = 'British Antarctic Survey'
historical.attrs['Units'] = 'ppmv'
historical.attrs['Model Info'] = 'UK Earth System Model 1'
historical.attrs['Description'] = 'Annual mean ozone profiles from historical experiment'
historical.attrs['Years covered'] = '2004-2014'

ssp126.attrs['Prepared by'] = 'D. Jones'
ssp126.attrs['Institute'] = 'British Antarctic Survey'
ssp126.attrs['Units'] = 'ppmv'
ssp126.attrs['Model Info'] = 'UK Earth System Model 1'
ssp126.attrs['Description'] = 'Annual mean ozone profiles from ssp126 experiment (strong emissions reductions)'
ssp126.attrs['Years covered'] = '2090-2100'

ssp585.attrs['Prepared by'] = 'D. Jones'
ssp585.attrs['Institute'] = 'British Antarctic Survey'
ssp585.attrs['Units'] = 'ppmv'
ssp585.attrs['Model Info'] = 'UK Earth System Model 1'
ssp585.attrs['Description'] = 'Annual mean ozone profiles from ssp585 experiment (strong emissions reductions)'
ssp585.attrs['Years covered'] = '2090-2100'


### Save results to NetCDF files

In [27]:
historical.load()
historical.to_netcdf(path='./data_out/UKESM_O3_historical_v2.nc')
ssp126.load()
ssp126.to_netcdf(path='./data_out/UKESM_O3_ssp126_v2.nc')
ssp585.load()
ssp585.to_netcdf(path='./data_out/UKESM_O3_ssp585_v2.nc')

### Create merged dataset, save to single NetCDF file

In [28]:
merged = xr.concat([historical,ssp126,ssp585],dim='year')
merged.to_netcdf(path='./data_out/UKESM_O3_merged.nc')