# Compute Nino3.4 DJF index for each model, and save to file

In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
from scipy.signal import detrend
from matplotlib import pyplot as plt
from eofs.xarray import Eof
from scipy import signal
import pandas as pd
import xarray as xr
import xesmf as xe
import intake
import pprint 
import util 

if util.is_ncar_host():
    col = intake.open_esm_datastore("../catalogs/glade-cmip6.json")
else:
    col = intake.open_esm_datastore("../catalogs/pangeo-cmip6.json")

In [2]:
# pick only models with at least 496 yrs in piControl
minyrs_control = 496;
# models with fewer years often missed future scenarios, so they are not so interesting for us

# load table:
data_table = pd.read_table('available_data.txt',index_col=0)
models_used = data_table['piControl (yrs)'][data_table['piControl (yrs)'] >= minyrs_control].index
print(models_used)

Index(['BCC-CSM2-MR', 'FGOALS-g3', 'CanESM5', 'CNRM-CM6-1', 'CNRM-ESM2-1',
       'E3SM-1-0', 'EC-Earth3', 'EC-Earth3-Veg', 'IPSL-CM6A-LR', 'MIROC-ES2L',
       'MIROC6', 'UKESM1-0-LL', 'MRI-ESM2-0', 'GISS-E2-1-G', 'GISS-E2-1-H',
       'CESM2', 'CESM2-WACCM', 'GFDL-CM4', 'SAM0-UNICON'],
      dtype='object')


In [3]:
model = models_used[0]
model

'BCC-CSM2-MR'

In [4]:
data_table.loc[model]

piControl (ens.mem.)         1
historical (ens.mem.)        3
ssp126 (ens.mem.)          NaN
ssp245 (ens.mem.)            1
ssp370 (ens.mem.)          NaN
ssp585 (ens.mem.)            1
abrupt-4xCO2 (ens.mem.)      1
piControl (yrs)            600
historical (yrs)           165
ssp126 (yrs)               NaN
ssp245 (yrs)                86
ssp370 (yrs)               NaN
ssp585 (yrs)                86
abrupt-4xCO2 (yrs)         151
Name: BCC-CSM2-MR, dtype: object

In [5]:
# what experiments does this model have that we want to study?
exp_list = [exp[:-11] for exp in data_table.loc[model][:6].index if float(data_table.loc[model][:6][exp]) > 0]
print(exp_list)
    

['piControl', 'historical', 'ssp245', 'ssp585']


In [6]:
exp_keys = {}; datasets = {}

for exp in exp_list:
    print(exp)
    cat = col.search(experiment_id = exp, source_id = model, variable_id='ts', table_id='Amon')
        
    dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}})
    for key in dset_dict.keys():
        exp_keys[exp] = key
        datasets[key] = dset_dict[key]

exp_keys

piControl

xarray will load netCDF datasets with dask using a single chunk for all arrays.
For effective chunking, please provide chunks in cdf_kwargs.
For example: cdf_kwargs={'chunks': {'time': 36}}

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 1 group(s)
historical

xarray will load netCDF datasets with dask using a single chunk for all arrays.
For effective chunking, please provide chunks in cdf_kwargs.
For example: cdf_kwargs={'chunks': {'time': 36}}

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 1 group(s)
ssp245

xarray will load netCDF datasets with dask using a single chunk for all arrays.
For effective chunking, please provide chunks in cdf_kwargs.
For example: cdf_kwargs={'chunks': {'time': 36}}

--> The keys in the returned

{'piControl': 'CMIP.BCC.BCC-CSM2-MR.piControl.Amon.gn',
 'historical': 'CMIP.BCC.BCC-CSM2-MR.historical.Amon.gn',
 'ssp245': 'ScenarioMIP.BCC.BCC-CSM2-MR.ssp245.Amon.gn',
 'ssp585': 'ScenarioMIP.BCC.BCC-CSM2-MR.ssp585.Amon.gn'}

In [7]:
def area_weights(lat_bnds, lon_bnds): 
    # computes exact area weigths assuming earth is a perfect sphere
    lowerlats = np.radians(lat_bnds[:,0]); upperlats = np.radians(lat_bnds[:,1])
    difflon = np.radians(np.diff(lon_bnds[0,:])) # if the differences in longitudes are all the same
    areaweights = difflon*(np.sin(upperlats) - np.sin(lowerlats));
    areaweights /= areaweights.mean()
    return areaweights # list of weights, of same dimension as latitude

In [8]:
# days per month:
dpm = {'noleap': [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]}

def day_weights(years, chosen_season = 'DJF', calendar = 'noleap'):
    
    if chosen_season == 'DJF':
        dpm_season = [dpm[calendar][-1]] + dpm[calendar][:2]
    elif chosen_season == 'all':
        dpm_season = dpm[calendar]
    # if other season wanted, continue developing this if-test
    
    # normalise weights within relevant season to have mean 1
    # NB: does not care what numbers are produced for other seasons
    norm_season = np.mean(dpm_season)
    weights = np.tile(np.array(dpm[calendar]),years)/norm_season
    return weights 

In [9]:
# load a dataset for calendar check:
# if other than noleap, above function must be changed
exp = exp_list[1]; print(exp)
key = exp_keys[exp]
exp_datasets = datasets[key]
members_sorted = exp_datasets.member_id.sortby(exp_datasets.member_id)

ds = exp_datasets.sel(member_id = members_sorted[0])
ds.time

historical


<xarray.DataArray 'time' (time: 1980)>
array([cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, 3, 16),
       cftime.DatetimeNoLeap(1850, 2, 15, 0, 0, 0, 0, 5, 46),
       cftime.DatetimeNoLeap(1850, 3, 16, 12, 0, 0, 0, 6, 75), ...,
       cftime.DatetimeNoLeap(2014, 10, 16, 12, 0, 0, 0, 6, 289),
       cftime.DatetimeNoLeap(2014, 11, 16, 0, 0, 0, 0, 2, 320),
       cftime.DatetimeNoLeap(2014, 12, 16, 12, 0, 0, 0, 4, 350)], dtype=object)
Coordinates:
    member_id  <U8 'r1i1p1f1'
  * time       (time) object 1850-01-16 12:00:00 ... 2014-12-16 12:00:00
Attributes:
    bounds:         time_bnds
    axis:           T
    long_name:      time
    standard_name:  time

In [10]:
# do a manual check of the time unit of the branch time,
# since it is not specified if the unit is years or days
print(ds.branch_time_in_parent)
if ds.branch_time_in_parent % 365 == 0:
    print('branch time unit is likely', ds.parent_time_units)
    branch_time_unit = 'days'
else:
    #print(ds.branch_time_in_parent % 365)
    print('branch time unit is likely years')
    branch_time_unit = 'years'

2439.0
branch time unit is likely years


In [12]:
latregion = slice(-5,5); lonregion = slice(190, 240) # = 120 W - 170 W
# use larger region before regridding, that adds 5 deg to each border:
larger_latregion = slice(-10,10); larger_lonregion = slice(185, 245)
resolution = 1;
ds_out = xr.Dataset({'lon': (['lon'], np.arange(lonregion.start+resolution/2, lonregion.stop+resolution/2, resolution)),
                     'lat': (['lat'], np.arange(latregion.start+resolution/2, latregion.stop+resolution/2, resolution))
                    }
                   )
regr_lat_bnds = np.array([[upper, upper+resolution] for upper in range(latregion.start,latregion.stop)])
regr_lon_bnds = np.array([[upper, upper+resolution] for upper in range(lonregion.start,lonregion.stop)])
area_w = area_weights(regr_lat_bnds, regr_lon_bnds)

season = 'DJF'
y_ext = 250; # years between 1850 to 2100 
branchyears = {}
lastD = {}

for exp in exp_list:
    key = exp_keys[exp]
    exp_datasets = datasets[key]
    members_sorted = exp_datasets.member_id.sortby(exp_datasets.member_id)
    branchyears[exp] = {};
    for member in members_sorted.values:
        print(exp, member)
        ds = exp_datasets.sel(member_id = member)
        
        if ds.parent_experiment_id == 'historical':
            # this should always have unit years
            parent_branch_from_grandparent = branchyears['historical'][ds.parent_variant_label]
            child_branch_from_grandparent = parent_branch_from_grandparent + data_table.loc[model, 'historical (yrs)']
            branchyear = child_branch_from_grandparent
        else:
            if branch_time_unit == 'years':
                branchyear = ds.branch_time_in_parent;
            elif branch_time_unit == 'days':
                branchyear = ds.branch_time_in_parent / 365;
        branchyears[exp][member] = branchyear   
        
        # select regional data, perform a regridding, and compute area average
        regional_data = ds.ts.sel(lat = larger_latregion, lon = larger_lonregion)
        regridder = xe.Regridder(regional_data, ds_out, 'bilinear', reuse_weights = True)
        regridded_data = regridder(regional_data)
        area_avg = (regridded_data.transpose('time', 'lon', 'lat') * area_w).mean(dim=['lon', 'lat'])
        
        yrs = int(area_avg.shape[0]/12)

        # average over season
        day_weighted_avg = area_avg*day_weights(yrs, chosen_season = season)
        ds_season = day_weighted_avg.where(day_weighted_avg['time.season'] == season) # creates nan in all other months
        
        if exp == 'historical':
            # save last december month for each member for use in season mean in first year of ssp exps
            lastD[member] = day_weighted_avg[-1]
        elif exp not in ['piControl','historical']: # then it must be future scenario   
            ds_season = xr.concat([lastD[member], ds_season], dim = 'time')
            
        ds_season = ds_season.rolling(min_periods=3, center=True, time=3).mean()
        
        if exp not in ['piControl','historical']:
            # remove nan-value obtained from inserting last december month from historical
            ds_season = ds_season[1:]
        seasonmean = ds_season.groupby('time.year').mean('time') # make annual mean
        # no information the first year of piControl and historical, since we are missing the december month before
        
        # day-weighted rolling 3-months mean for all months (with seasonal variations)
        #day_weighted_avg_allyear = area_avg*day_weights(yrs, chosen_season = 'all')
        #smoothed_allyear = day_weighted_avg_allyear.rolling(min_periods=3, center=True, time=3).mean()
        
        colname = [(exp, member)]
        if exp == 'piControl' and member == 'r1i1p1f1':
            # create dataframe for storing all results and make the piControl years the index
            # extend years by y_ext years, in case historical branches in the end of piControl
            control_years = seasonmean.year.values
            years = list(control_years) + list(np.arange(control_years[-1]+1, control_years[-1] + y_ext+1))
            data = np.append(seasonmean.values, np.full(y_ext, np.nan)) # append nans
            df = pd.DataFrame(data, index = years, columns = colname)
        else:
            nans1 = np.full(int(branchyear - control_years[0]), np.nan);
            nans2 = np.full(len(years) - len(seasonmean.values) - len(nans1), np.nan)
            data = np.concatenate((nans1, seasonmean.values, nans2))

            df_col = pd.DataFrame(data, index = years, columns = colname)
            df = pd.merge(df, df_col, left_index=True, right_index=True, how='outer')
            # all experiments are stored in piControl time units
            # starting at their branchyear in piControl
            # all other values are set to be nan
        
df.columns = pd.MultiIndex.from_tuples(df.columns, names=['Experiment','Member'])

# remove all rows at the end containg only nan values
allnans = True
while allnans == True:
    if all(np.isnan(df.iloc[-1])):
        df = df[:-1]
    else:
        allnans = False


piControl r1i1p1f1
Reuse existing file: bilinear_18x53_10x50.nc
historical r1i1p1f1
Reuse existing file: bilinear_18x53_10x50.nc
historical r2i1p1f1
Reuse existing file: bilinear_18x53_10x50.nc
historical r3i1p1f1
Reuse existing file: bilinear_18x53_10x50.nc
ssp245 r1i1p1f1
Reuse existing file: bilinear_18x53_10x50.nc
ssp585 r1i1p1f1
Reuse existing file: bilinear_18x53_10x50.nc


In [None]:
#pd.set_option('display.max_rows', 500)

In [13]:
df

Experiment,piControl,historical,historical,historical,ssp245,ssp585
Member,r1i1p1f1,r1i1p1f1,r2i1p1f1,r3i1p1f1,r1i1p1f1,r1i1p1f1
1850,,,,,,
1851,300.445907,,,,,
1852,299.368412,,,,,
1853,298.780820,,,,,
1854,300.150619,,,,,
...,...,...,...,...,...,...
2685,,,,,301.273165,303.678246
2686,,,,,302.711977,302.845171
2687,,,,,301.885820,303.102484
2688,,,,,302.276828,303.891943


In [15]:
#df.to_csv('../Processed_data/Nino3_4_DJF/' + model + '_DJF_nino3_4index.txt', sep='\t')