# Global averaging

In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
from scipy.signal import detrend
from matplotlib import pyplot as plt
from scipy import signal
import pandas as pd
import xarray as xr
import intake
import pprint 
import util 

col_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
col = intake.open_esm_datastore(col_url)

file = 'available_data_cloud_june12th_2020.txt'

In [2]:
# load table:
data_table = pd.read_table(file,index_col=0)
models_used = data_table['piControl (yrs)'].index
print(models_used)

Index(['IPSL-CM6A-LR', 'MRI-ESM2-0', 'MPI-ESM1-2-LR', 'TaiESM1',
       'AWI-CM-1-1-MR', 'AWI-ESM-1-1-LR', 'BCC-CSM2-MR', 'BCC-ESM1',
       'CAMS-CSM1-0', 'FGOALS-f3-L', 'FGOALS-g3', 'IITM-ESM', 'CanESM5',
       'CanESM5-CanOE', 'CNRM-CM6-1', 'CNRM-CM6-1-HR', 'CNRM-ESM2-1',
       'ACCESS-ESM1-5', 'ACCESS-CM2', 'E3SM-1-0', 'E3SM-1-1', 'E3SM-1-1-ECA',
       'EC-Earth3', 'EC-Earth3-LR', 'EC-Earth3-Veg', 'EC-Earth3-Veg-LR',
       'FIO-ESM-2-0', 'MPI-ESM-1-2-HAM', 'INM-CM4-8', 'INM-CM5-0',
       'MIROC-ES2L', 'MIROC6', 'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM',
       'UKESM1-0-LL', 'MPI-ESM1-2-HR', 'GISS-E2-1-G', 'GISS-E2-1-G-CC',
       'GISS-E2-1-H', 'GISS-E2-2-G', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM',
       'CESM2-WACCM-FV2', 'NorCPM1', 'NorESM1-F', 'NorESM2-LM', 'NorESM2-MM',
       'KACE-1-0-G', 'GFDL-CM4', 'GFDL-ESM4', 'NESM3', 'SAM0-UNICON', 'CIESM',
       'MCM-UA-1-0'],
      dtype='object')


# Choose model and variable

In [108]:
model = models_used[5]; variable = 'tas'
print(model)

# what experiments does this model have that we want to study?
if data_table.loc[model][0] == 'Problem when loading more than 1':    # then we need to load the piControl members manually
    piControl_members = ['r1i1p1f1', 'r2i1p1f1']
    exp_list = np.append(['piControl'], [exp[:-11] for exp in data_table.loc[model][1:7].index if float(data_table.loc[model][1:7][exp]) > 0])
else: 
    exp_list = [exp[:-11] for exp in data_table.loc[model][:7].index if float(data_table.loc[model][:7][exp]) > 0]
print(exp_list)
data_table.loc[model]



AWI-ESM-1-1-LR
['piControl', 'historical']


piControl (ens.mem.)         1
historical (ens.mem.)        1
ssp126 (ens.mem.)          NaN
ssp245 (ens.mem.)          NaN
ssp370 (ens.mem.)          NaN
ssp585 (ens.mem.)          NaN
abrupt-4xCO2 (ens.mem.)    NaN
piControl (yrs)            100
historical (yrs)           165
ssp126 (yrs)               NaN
ssp245 (yrs)               NaN
ssp370 (yrs)               NaN
ssp585 (yrs)               NaN
abrupt-4xCO2 (yrs)         NaN
Name: AWI-ESM-1-1-LR, dtype: object

In [86]:
exp_keys = {}; datasets = {}

for exp in exp_list[1:]:
    print(exp)
    
    if exp == 'piControl' and data_table.loc[model][0] == 'Problem when loading more than 1':
        print('Need to write some code that loads members separately')
    else:
        #cat = col.search(experiment_id = exp, source_id = model, variable_id=variable, table_id='Amon') 
        cat = col.search(experiment_id = exp, source_id = model, variable_id=variable, table_id='Amon') 
        dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}})
        for key in dset_dict.keys():
            exp_keys[exp] = key
            datasets[key] = dset_dict[key]
exp_keys

historical
--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 1 group(s)
ssp126
--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 1 group(s)
ssp245
--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 1 group(s)
ssp370
--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 1 group(s)
ssp585
--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 1 group(s)
abrupt-4xCO2
--> The keys in the returned

{'historical': 'CMIP.MRI.MRI-ESM2-0.historical.Amon.gn',
 'ssp126': 'ScenarioMIP.MRI.MRI-ESM2-0.ssp126.Amon.gn',
 'ssp245': 'ScenarioMIP.MRI.MRI-ESM2-0.ssp245.Amon.gn',
 'ssp370': 'ScenarioMIP.MRI.MRI-ESM2-0.ssp370.Amon.gn',
 'ssp585': 'ScenarioMIP.MRI.MRI-ESM2-0.ssp585.Amon.gn',
 'abrupt-4xCO2': 'CMIP.MRI.MRI-ESM2-0.abrupt-4xCO2.Amon.gn'}

In [106]:
##### load just piControl #####
#exp_keys = {}; datasets = {}
#exp = 'piControl'

#cat = col.search(experiment_id = exp, source_id = model, variable_id=variable, table_id='Amon') 
#if model in ['IPSL-CM6A-LR', 'MRI-ESM2-0', 'MPI-ESM1-2-LR']:
#    cat = col.search(experiment_id = exp, source_id = model, variable_id=variable, table_id='Amon', member_id = 'r1i1p1f1') 
#dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}})
#for key in dset_dict.keys():
#    exp_keys[exp] = key
#    datasets[key] = dset_dict[key]
#exp_keys

# load a dataset for manual calendar check:
exp = exp_list[0]; print(exp)
key = exp_keys[exp]
exp_datasets = datasets[key]
members_sorted = exp_datasets.member_id.sortby(exp_datasets.member_id)

ds = exp_datasets.sel(member_id = members_sorted[0])
print(model, ds.time)
print('\n')

# Time formats for piControl:
if model in ['TaiESM1', 'BCC-CSM2-MR', 'BCC-ESM1', 'CAMS-CSM1-0', 'FGOALS-f3-L', 'FGOALS-g3', 'CanESM5', 'CanESM5-CanOE', 'E3SM-1-0', 'E3SM-1-1', 'E3SM-1-1-ECA', 'FIO-ESM-2-0', 'INM-CM4-8', 'INM-CM5-0', 'GISS-E2-1-G', 'GISS-E2-1-G-CC', 'GISS-E2-1-H', 'GISS-E2-2-G', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', 'NorCPM1', 'NorESM1-F', 'NorESM2-LM', 'NorESM2-MM', 'GFDL-CM4', 'SAM0-UNICON', 'GFDL-ESM4', 'CIESM', 'MCM-UA-1-0']:
    ds_calendar = 'noleap'
elif model in ['CNRM-CM6-1', 'IPSL-CM6A-LR', 'MIROC-ES2L', 'MIROC6', 'NESM3']:
    ds_calendar = 'gregorian'
elif model in ['AWI-CM-1-1-MR', 'EC-Earth3-Veg', 'EC-Earth3-Veg-LR', 'ACCESS-ESM1-5', 'ACCESS-CM2', 'MPI-ESM-1-2-HAM', 'MPI-ESM1-2-LR', 'MPI-ESM1-2-HR']:
    ds_calendar = 'proleptic_gregorian'
elif model in ['UKESM1-0-LL', 'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM', 'CNRM-ESM2-1', 'KACE-1-0-G', 'MRI-ESM2-0']:
    ds_calendar = '360_day'
    if model in ['CNRM-ESM2-1', 'MRI-ESM2-0']:
        print('piControl is 360_day, the other experiments unknown')
elif model in ['IITM-ESM']:
    ds_calendar = 'Julian'
elif model in ['AWI-ESM-1-1-LR', 'CNRM-CM6-1-HR', 'EC-Earth3', 'EC-Earth3-LR']:
    ds_calendar = 'datetime64'
    print('not sure what calendar this model has')
    if model in ['EC-Earth3', 'EC-Earth3-LR', 'AWI-ESM-1-1-LR']:
        print('calendar is likely proleptic gregorian')
    elif model in ['CNRM-CM6-1-HR']:
        print('calendar is likely gregorian')
    
print(ds_calendar, 'calendar')

piControl
AWI-ESM-1-1-LR <xarray.DataArray 'time' (time: 1200)>
array(['1953-01-16T12:00:00.000000000', '1953-02-15T00:00:00.000000000',
       '1953-03-16T12:00:00.000000000', ..., '1939-10-16T12:00:00.000000000',
       '1939-11-16T00:00:00.000000000', '1939-12-16T12:00:00.000000000'],
      dtype='datetime64[ns]')
Coordinates:
  * time       (time) datetime64[ns] 1953-01-16T12:00:00 ... 1939-12-16T12:00:00
    member_id  <U8 'r1i1p1f1'
Attributes:
    axis:           T
    bounds:         time_bnds
    long_name:      time
    standard_name:  time


not sure what calendar this model has
calendar is likely proleptic gregorian
datetime64 calendar


In [23]:
def area_weights(lat_bnds, lon_bnds): 
    # computes exact area weigths assuming earth is a perfect sphere
    lowerlats = np.radians(lat_bnds[:,0]); upperlats = np.radians(lat_bnds[:,1])
    difflon = np.radians(np.diff(lon_bnds[0,:])) # if the differences in longitudes are all the same
    areaweights = difflon*(np.sin(upperlats) - np.sin(lowerlats));
    areaweights /= areaweights.mean()
    return areaweights # list of weights, of same dimension as latitude

# function copied from: http://xarray.pydata.org/en/stable/examples/monthly-means.html
def leap_year(year, calendar='standard'):
    """Determine if year is a leap year"""
    leap = False
    if ((calendar in ['standard', 'gregorian',
        'proleptic_gregorian', 'julian']) and
        (year % 4 == 0)):
        leap = True
        if ((calendar == 'proleptic_gregorian') and
            (year % 100 == 0) and
            (year % 400 != 0)):
            leap = False
        elif ((calendar in ['standard', 'gregorian']) and
                 (year % 100 == 0) and (year % 400 != 0) and
                 (year < 1583)):
            leap = False
    return leap

# function copied from: http://xarray.pydata.org/en/stable/examples/monthly-means.html
def get_dpm(time, calendar='standard'):
    """
    return a array of days per month corresponding to the months provided in `months`
    """
    month_length = np.zeros(len(time), dtype=np.int)

    cal_days = dpm[calendar]

    for i, (month, year) in enumerate(zip(time.month, time.year)):
        month_length[i] = cal_days[month]
        if leap_year(year, calendar=calendar) and month == 2:
            month_length[i] += 1
    return month_length

# days per month:
dpm = {'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       'julian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], ##### I think this should be correct
       'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
       '360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]
      }

def compute_day_weights(ds, calendar = 'noleap'): # new function
    month_length = xr.DataArray(get_dpm((ds.time.to_index()), calendar=ds_calendar), coords=[ds.time], name='month_length')
    
    ##### This code is only tested for noleap so far #####
    norm_by_annual = month_length.groupby('time.year').mean('time') # make annual mean
    norm_by_monthly = np.concatenate([np.tile(norm_by_annual.values[i], 12) for i in range(len(norm_by_annual.values))])
    weights = month_length/norm_by_monthly
    # normalized to have mean 1
    return weights 

In [24]:
area_w = area_weights(ds.lat_bnds.values, ds.lon_bnds.values)

for exp in exp_list:
    key = exp_keys[exp]
    exp_datasets = datasets[key]
    members_sorted = exp_datasets.member_id.sortby(exp_datasets.member_id)
    
    for member in members_sorted.values:
        print(exp, member)
        ds = exp_datasets.sel(member_id = member)
        data = ds[variable]
        
        if model == 'MCM-UA-1-0':
             ds = ds.rename({'longitude': 'lon','latitude': 'lat'}) 
        
        # global average
        area_avg = (data.transpose('time', 'lon', 'lat') * area_w).mean(dim=['lon', 'lat'])
        day_weights = compute_day_weights(ds)
        
        #yrs = int(area_avg.shape[0]/12)
        
        # annual average
        day_weighted_avg = area_avg*day_weights
        annualmean = day_weighted_avg.groupby('time.year').mean('time')
        
        colname = [(exp, member)]
        
        #first_member_piControl = 'r1i1p1f1'
        #if model in ['CNRM-CM6-1', 'CNRM-ESM2-1', 'UKESM1-0-LL', 'MIROC-ES2L']:
        #    first_member_piControl = 'r1i1p1f2'
        #elif model in ['GISS-E2-1-G']:
        #    first_member_piControl = 'r101i1p1f1'
        
        #if exp == 'piControl' and member == first_member_piControl:
        if exp == 'piControl' and member == 'r1i1p1f1':
            # create dataframe for storing all results and make the piControl years the index
            df = pd.DataFrame(annualmean.values, columns = colname)
        else:
            df_col = pd.DataFrame(annualmean.values, columns = colname)
            df = pd.merge(df, df_col, left_index=True, right_index=True, how='outer')
        
df.columns = pd.MultiIndex.from_tuples(df.columns, names=['Experiment','Member'])




piControl r1i1p1f1
historical r1i1p1f1
historical r2i1p1f1
historical r3i1p1f1
historical r4i1p1f1
historical r5i1p1f1
ssp126 r1i1p1f1
ssp245 r1i1p1f1
ssp370 r1i1p1f1
ssp370 r2i1p1f1
ssp370 r3i1p1f1
ssp370 r4i1p1f1
ssp370 r5i1p1f1
ssp585 r1i1p1f1
abrupt-4xCO2 r1i1p1f1


In [25]:
df

Experiment,piControl,historical,historical,historical,historical,historical,ssp126,ssp245,ssp370,ssp370,ssp370,ssp370,ssp370,ssp585,abrupt-4xCO2
Member,r1i1p1f1,r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r1i1p1f1,r1i1p1f1,r1i1p1f1,r2i1p1f1,r3i1p1f1,r4i1p1f1,r5i1p1f1,r1i1p1f1,r1i1p1f1
0,287.087304,286.832076,286.894184,286.581452,287.107015,286.904730,288.149132,287.307614,288.069650,288.164790,288.066750,288.266559,288.197767,288.099701,288.127903
1,286.983361,286.835224,286.788679,286.676961,286.958355,286.929121,288.955535,289.020832,288.335865,288.162434,288.226010,288.238659,288.317330,288.214821,288.990072
2,286.859565,286.900352,286.900137,286.757013,286.881314,286.814834,288.180500,288.270828,288.131156,288.298607,288.453577,288.175065,288.286112,288.525420,289.281347
3,286.988685,287.079712,286.925065,286.733798,286.744366,286.907293,288.287847,288.397648,288.267146,288.295661,288.637314,288.221088,288.475879,288.440470,289.692480
4,286.962557,287.083481,286.785969,286.918747,286.797808,286.770911,287.546930,287.588297,288.280307,288.145384,288.599207,288.395516,288.553787,288.211626,290.099805
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,287.072613,,,,,,,,,,,,,,
496,286.874089,,,,,,,,,,,,,,
497,286.846525,,,,,,,,,,,,,,
498,286.931116,,,,,,,,,,,,,,


In [26]:
df.to_csv('../Processed_data/Global_annual_means/' + model + '_' + variable + '_means.txt')