# Automatic global averaging for each model

### to be used if all global averages have to be redone

In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
from scipy.signal import detrend
from matplotlib import pyplot as plt
from scipy import signal
import pandas as pd
import xarray as xr
import intake
import pprint
import util 
import os

col_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
col = intake.open_esm_datastore(col_url)


In [17]:
# load functions:
def area_weights(lat_bnds, lon_bnds): 
    # computes exact area weigths assuming earth is a perfect sphere
    lowerlats = np.radians(lat_bnds[:,0]); upperlats = np.radians(lat_bnds[:,1])
    difflon = np.radians(np.diff(lon_bnds[0,:])) # if the differences in longitudes are all the same
    areaweights = difflon*(np.sin(upperlats) - np.sin(lowerlats));
    areaweights /= areaweights.mean()
    return areaweights # list of weights, of same dimension as latitude

# function copied from: http://xarray.pydata.org/en/stable/examples/monthly-means.html
def leap_year(year, calendar='standard'):
    """Determine if year is a leap year"""
    leap = False
    if ((calendar in ['standard', 'gregorian',
        'proleptic_gregorian', 'julian']) and
        (year % 4 == 0)):
        leap = True
        if ((calendar == 'proleptic_gregorian') and
            (year % 100 == 0) and
            (year % 400 != 0)):
            leap = False
        elif ((calendar in ['standard', 'gregorian']) and
                 (year % 100 == 0) and (year % 400 != 0) and
                 (year < 1583)):
            leap = False
    return leap

# function copied from: http://xarray.pydata.org/en/stable/examples/monthly-means.html
def get_dpm(time, calendar='standard'):
    """
    return a array of days per month corresponding to the months provided in `months`
    """
    month_length = np.zeros(len(time), dtype=np.int)

    cal_days = dpm[calendar]

    for i, (month, year) in enumerate(zip(time.month, time.year)):
        month_length[i] = cal_days[month]
        if leap_year(year, calendar=calendar) and month == 2:
            month_length[i] += 1
    return month_length

# days per month:
dpm = {'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       'julian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], ##### I think this should be correct
       'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       '360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]
      }

def compute_day_weights(ds, calendar = 'noleap', first_month = 1): # new function
    month_length = xr.DataArray(get_dpm((ds.time.to_index()), calendar=ds_calendar), coords=[ds.time], name='month_length')
    
    ##### This code is only tested for noleap so far #####
    if first_month == 1:
        norm_by_annual = month_length.groupby('time.year').mean('time') # make annual mean
        norm_by_monthly = np.concatenate([np.tile(norm_by_annual.values[i], 12) for i in range(len(norm_by_annual.values))])
    else: 
        norm_by_annual = np.array([month_length[i*12:(i+1)*12].mean() for i in range(int(len(month_length)/12))])
        norm_by_monthly = np.concatenate([np.tile(norm_by_annual[i], 12) for i in range(len(norm_by_annual))])
        
    weights = month_length/norm_by_monthly
    # normalized to have mean 1
    return weights 

def calendar_check(model):
    # Time formats for piControl, found from manual check:
    if model in ['TaiESM1', 'BCC-CSM2-MR', 'BCC-ESM1', 'CAMS-CSM1-0', 'CAS-ESM2-0', 'FGOALS-f3-L', 'FGOALS-g3', 'CanESM5', 'CanESM5-CanOE', 'E3SM-1-0', 'E3SM-1-1', 'E3SM-1-1-ECA', 'FIO-ESM-2-0', 'INM-CM4-8', 'INM-CM5-0', 'GISS-E2-1-G', 'GISS-E2-1-G-CC', 'GISS-E2-1-H', 'GISS-E2-2-G', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', 'NorCPM1', 'NorESM1-F', 'NorESM2-LM', 'NorESM2-MM', 'GFDL-CM4', 'SAM0-UNICON', 'GFDL-ESM4', 'CIESM', 'MCM-UA-1-0']:
        ds_calendar = 'noleap'
    elif model in ['EC-Earth3', 'CNRM-CM6-1', 'IPSL-CM6A-LR', 'MIROC-ES2L', 'MIROC6', 'NESM3']: # 'IPSL-CM6A-LR':'piClim-4xCO2','piClim-control' says noleap calendar
        ds_calendar = 'gregorian'
    elif model in ['AWI-CM-1-1-MR', 'AWI-ESM-1-1-LR', 'EC-Earth3-Veg', 'EC-Earth3-Veg-LR', 'ACCESS-ESM1-5', 'ACCESS-CM2', 'MPI-ESM-1-2-HAM', 'MPI-ESM1-2-LR', 'MPI-ESM1-2-HR', 'EC-Earth3-LR']:
        ds_calendar = 'proleptic_gregorian'
    elif model in ['UKESM1-0-LL', 'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM', 'CNRM-ESM2-1', 'KACE-1-0-G', 'MRI-ESM2-0']:
        ds_calendar = '360_day'
        if model in ['CNRM-ESM2-1', 'MRI-ESM2-0']:
            print('piControl is 360_day, the other experiments unknown')
    elif model in ['IITM-ESM']:
        ds_calendar = 'julian'
    elif model in ['CNRM-CM6-1-HR', 'EC-Earth3', 'EC-Earth3-LR']:
        #ds_calendar = 'datetime64'
        print('not 100% sure what calendar this model has, but a guess is made based on other models from same institution')
        if model in ['CNRM-CM6-1-HR']:
            print('calendar is likely gregorian')
            ds_calendar = 'gregorian'
    return ds_calendar

def exp_list(model):
    if model in ['ACCESS-CM2', 'ACCESS-ESM1-5', 'GFDL-ESM4', 'MPI-ESM1-2-LR', 'NorESM2-MM', 'UKESM1-0-LL']:
        experiments = ['piControl', 'abrupt-4xCO2', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585', 'piClim-4xCO2', 'piClim-control']
    elif model in ['CNRM-ESM2-1']:
        experiments = ['piControl', 'abrupt-4xCO2', 'historical', 'ssp245', 'ssp370', 'ssp585', 'piClim-4xCO2', 'piClim-control']
    elif model in ['AWI-CM-1-1-MR', 'BCC-CSM2-MR', 'CAMS-CSM1-0', 'CNRM-CM6-1-HR', 'INM-CM4-8', 'INM-CM5-0', 'MIROC-ES2L', 'MPI-ESM1-2-HR']:
        experiments = ['piControl', 'abrupt-4xCO2', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
    elif model in ['AWI-ESM-1-1-LR', 'E3SM-1-1', 'E3SM-1-1-ECA', 'GISS-E2-1-G-CC', 'MPI-ESM-1-2-HAM', 'NorCPM1']:
        experiments = ['piControl', 'historical']
    elif model in ['BCC-ESM1']:
        experiments = ['piControl','abrupt-4xCO2', 'historical', 'ssp370', 'piClim-control']
    elif model in ['CESM2', 'CNRM-CM6-1', 'MRI-ESM2-0']:
        experiments = ['piControl', 'abrupt-4xCO2', 'abrupt-2xCO2', 'abrupt-0p5xCO2', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585', 'piClim-4xCO2', 'piClim-control']
    elif model in ['CanESM5', 'GISS-E2-1-G', 'MIROC6']:
        experiments = ['piControl', 'abrupt-4xCO2', 'abrupt-2xCO2', 'abrupt-0p5xCO2', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585', 'piClim-4xCO2', 'piClim-control', 'piClim-histall']
    elif model in ['CESM2-FV2', 'CESM2-WACCM-FV2', 'IITM-ESM', 'NorESM1-F']:
        experiments = ['piControl']
    elif model in ['CESM2-WACCM']:
        experiments = ['piControl', 'abrupt-4xCO2', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585', 'piClim-control']
    elif model in ['E3SM-1-0', 'SAM0-UNICON']:
        experiments = ['piControl', 'abrupt-4xCO2', 'historical']
    elif model in ['EC-Earth3', 'HadGEM3-GC31-MM']:
        experiments = ['abrupt-4xCO2']
    elif model in ['EC-Earth3-Veg', 'TaiESM1']:
        experiments = ['piControl', 'abrupt-4xCO2']
    elif model in ['FGOALS-g3']:
        experiments = ['piControl', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
    elif model in ['GFDL-CM4']:
        experiments = ['piControl', 'abrupt-4xCO2', 'historical', 'ssp245', 'ssp585', 'piClim-4xCO2', 'piClim-control', 'piClim-histall']
    elif model in ['GISS-E2-1-H']:
        experiments = ['piControl', 'abrupt-4xCO2', 'abrupt-2xCO2', 'historical']
    elif model in ['GISS-E2-2-G']:
        experiments = ['piControl', 'abrupt-2xCO2']
    elif model in ['HadGEM3-GC31-LL']:
        experiments = ['piControl', 'abrupt-4xCO2', 'abrupt-0p5xCO2', 'historical', 'ssp126', 'ssp245', 'ssp585', 'piClim-4xCO2', 'piClim-control']
    elif model in ['IPSL-CM6A-LR']:
        experiments = ['piControl', 'abrupt-2xCO2', 'abrupt-0p5xCO2', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585', 'piClim-4xCO2', 'piClim-control', 'piClim-histall']
    elif model in ['KACE-1-0-G']:
        experiments = ['abrupt-4xCO2', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
    elif model in ['NESM3']:
        experiments = ['abrupt-4xCO2','historical']
    elif model in ['NorESM2-LM']:
        experiments = ['piControl', 'abrupt-4xCO2', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585', 'piClim-4xCO2', 'piClim-control', 'piClim-histall']
    return experiments
    

In [27]:
model = 'CNRM-CM6-1'
print(model)

ds_calendar = calendar_check(model)
print(ds_calendar, 'calendar according to the function calendar_check')
explist = exp_list(model)
print(explist)

CNRM-CM6-1
gregorian calendar according to the function calendar_check
['piControl', 'abrupt-4xCO2', 'abrupt-2xCO2', 'abrupt-0p5xCO2', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585', 'piClim-4xCO2', 'piClim-control']


In [28]:

if model in ['CNRM-CM6-1', 'CNRM-CM6-1-HR', 'CNRM-ESM2-1', 'IPSL-CM6A-LR']: # French models missing lat_bnds and lon_bnds coordinates
    # if computing area-weights from areacella instead:
    #area_cat= col.search(source_id = model, variable_id=['areacella'])
    area_cat = col.search(experiment_id = 'piControl', source_id = model, variable_id=['areacella'])
    area_dset_dict = area_cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}})

    for key in area_dset_dict.keys():
        area_ds = area_dset_dict[key]
    areas = area_ds['areacella'].values[0,:,0]
    norm_areas = areas/areas.mean()

for exp in explist:
    if model == 'UKESM1-0-LL':
        cat = col.search(experiment_id = exp, source_id = model, variable_id=['tas', 'rlut', 'rsut', 'rsdt'], table_id='Amon', institution_id = 'MOHC')
        #cat = col.search(experiment_id = exp, source_id = model, variable_id=['tas', 'rlut', 'rsut', 'rsdt'], table_id='Amon', institution_id = 'NIMS-KMA')
    if model == 'MPI-ESM1-2-HR': # select institution if there are two groups
        cat = col.search(experiment_id = exp, source_id = model, variable_id=['tas', 'rlut', 'rsut', 'rsdt'], table_id='Amon')
        #cat = col.search(experiment_id = exp, source_id = model, variable_id=['tas', 'rlut', 'rsut', 'rsdt'], table_id='Amon', institution_id = 'DKRZ') 
        #cat = col.search(experiment_id = exp, source_id = model, variable_id=['tas', 'rlut', 'rsut', 'rsdt'], table_id='Amon', institution_id = 'DWD') 
    elif model in ['MPI-ESM1-2-LR', 'IPSL-CM6A-LR', 'MRI-ESM2-0', 'EC-Earth3'] and exp in ['piControl']: # problem when loading more than one member simultaneously, so specify member_id:
        cat = col.search(experiment_id = exp, source_id = model, variable_id=['tas', 'rlut', 'rsut', 'rsdt'], table_id='Amon', member_id = 'r2i1p1f1')
    else:
        cat = col.search(experiment_id = exp, source_id = model, variable_id=['tas', 'rlut', 'rsut', 'rsdt'], table_id='Amon')
   
    if 'dset_dict' in locals():
        del dset_dict
    if 'ds_exp' in locals():
        del ds_exp
    dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}})
    #for key in ['CMIP.MOHC.UKESM1-0-LL.historical.Amon.gn']:
    for key in dset_dict.keys():
        ds_exp = dset_dict[key]
        if 'members_sorted' in locals():
            del members_sorted
        members_sorted = ds_exp.member_id.sortby(ds_exp.member_id)

    if model == 'MCM-UA-1-0':
        ds_exp = ds_exp.rename({'longitude': 'lon','latitude': 'lat'}) 

    for member in members_sorted:
        print(member.values)
        
    print(ds_exp.time.encoding['calendar'])

    
    # loop through members
    #for member in [members_sorted.sel(member_id = 'r1i1p1f1')]:
    for member in members_sorted:
        print(exp, member.values)

        ds = ds_exp.sel(member_id = member)

        ds_calendar = ds.time.encoding['calendar']
        unit = ds.time.encoding['units']
        firstmonth = ds_exp.time.to_index().month[0]

        print(ds_calendar, 'calendar')
        print(unit)
        print('first month of dataset is:', firstmonth)

        # compute weights for average
        if model == 'NorCPM1' and exp == 'historical':
            area_w = area_weights(ds.lat_bnds.values[0,:,:], ds.lon_bnds.values)
        elif model in ['CNRM-CM6-1', 'CNRM-CM6-1-HR', 'CNRM-ESM2-1', 'IPSL-CM6A-LR']: # French models missing lat_bnds and lon_bnds coordinates
            area_w = norm_areas
        else:
            area_w = area_weights(ds.lat_bnds.values, ds.lon_bnds.values)

        day_weights = compute_day_weights(ds, calendar = ds_calendar, first_month = firstmonth)

        varlist = ['tas', 'rlut', 'rsut', 'rsdt']

        for variable in varlist:
            print(variable)
            data = ds[variable]

            # global average
            area_avg = (data.transpose('time', 'lon', 'lat') * area_w).mean(dim=['lon', 'lat'])

            # annual average
            day_weighted_avg = area_avg*day_weights
            if firstmonth == 1:
                annualmean = day_weighted_avg.groupby('time.year').mean('time')
            else:
                annualmean_array = np.array([day_weighted_avg[i*12:(i+1)*12].mean() for i in range(int(len(day_weighted_avg)/12))])
                annualmean = xr.DataArray(annualmean_array)

            if variable == varlist[0]:
                # create dataframe for storing all results
                df = pd.DataFrame(annualmean.values, columns = [variable])
            else:
                df_col = pd.DataFrame(annualmean.values, columns = [variable])
                df = pd.merge(df, df_col, left_index=True, right_index=True, how='outer')

        filename = model + '_' + exp + '_' + str(member.values) + '_means.txt'
        file = os.path.join('../Processed_data/Global_annual_means/', model, exp, filename)
        #if member == members_sorted[0]: # create directory for first member
        #    os.makedirs(os.path.dirname(file), exist_ok=False)

        df.to_csv(file)

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 1 group(s)
--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 1 group(s)
r1i1p1f2
gregorian
piControl r1i1p1f2
gregorian calendar
hours since 1850-01-16 12:00:00.000000
first month of dataset is: 1
tas
rlut
rsut
rsdt
--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 1 group(s)
r1i1p1f2
r2i1p1f2
r3i1p1f2
r4i1p1f2
r5i1p1f2
r6i1p1f2
gregorian
abrupt-4xCO2 r1i1p1f2
gregorian calendar
days since 1850-01-01
first month of dataset is: 1
tas
rlut
rsut
rsdt
abrupt-4xCO2 r2i1p1f2
gregorian calendar
days since 1850-01-01
first month of dataset is: 1
tas
rlut
rsut
rsdt
abrupt-4xCO2 r3i1p

In [29]:
df

Unnamed: 0,tas,rlut,rsut,rsdt
0,286.137598,238.299692,100.783751,340.445992
1,286.188018,238.475965,100.803157,340.445992
2,286.142137,238.22605,100.846087,340.477171
3,286.182011,238.352729,100.839983,340.445998
4,286.162603,238.450502,100.668853,340.445992
5,286.187039,238.619295,100.82003,340.445992
6,286.149362,238.262815,100.92625,340.477171
7,286.119548,238.202013,100.868418,340.445998
8,286.122439,238.116736,100.829697,340.445992
9,286.105587,238.233546,100.802455,340.445992
