# Find calendar

### for experiments where I have made a global annual average

In [1]:
import os
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import xarray as xr
import intake

In [2]:
# check which folders are available, and make a set of available experiments

experiments = {}

directory = '../Processed_data/Global_annual_means/'

model_names = [ f.name for f in os.scandir(directory) if f.is_dir() and f.name !='.ipynb_checkpoints']

for model in model_names:
    #print(model)
    experiments[model] = {}
    modeldirectory = os.path.join(directory, model)
    modelexp_names = [ f.name for f in os.scandir(modeldirectory) if f.is_dir() and f.name !='.ipynb_checkpoints']
    for exp in modelexp_names:
        #print(exp)
        experiments[model][exp] = {}
        modelexpdirectory = os.path.join(modeldirectory, exp)
        filenames = [f.name for f in os.scandir(modelexpdirectory) if f.name !='.ipynb_checkpoints']
        
        members = [file.rsplit('_')[2] for file in filenames]
        members.sort()
        experiments[model][exp] = members


In [4]:
model_names.sort()

In [5]:
model_names

['ACCESS-CM2',
 'ACCESS-ESM1-5',
 'AWI-CM-1-1-MR',
 'AWI-ESM-1-1-LR',
 'BCC-CSM2-MR',
 'BCC-ESM1',
 'CAMS-CSM1-0',
 'CAS-ESM2-0',
 'CESM2',
 'CESM2-FV2',
 'CESM2-WACCM',
 'CESM2-WACCM-FV2',
 'CIESM',
 'CMCC-CM2-SR5',
 'CNRM-CM6-1',
 'CNRM-CM6-1-HR',
 'CNRM-ESM2-1',
 'CanESM5',
 'CanESM5-CanOE',
 'E3SM-1-0',
 'E3SM-1-1',
 'E3SM-1-1-ECA',
 'EC-Earth3',
 'EC-Earth3-Veg',
 'EC-Earth3-Veg-LR',
 'FGOALS-f3-L',
 'FGOALS-g3',
 'FIO-ESM-2-0',
 'GFDL-CM4',
 'GFDL-ESM4',
 'GISS-E2-1-G',
 'GISS-E2-1-G-CC',
 'GISS-E2-1-H',
 'GISS-E2-2-G',
 'HadGEM3-GC31-LL',
 'HadGEM3-GC31-MM',
 'IITM-ESM',
 'INM-CM4-8',
 'INM-CM5-0',
 'IPSL-CM6A-LR',
 'KACE-1-0-G',
 'KIOST-ESM',
 'MIROC-ES2L',
 'MIROC6',
 'MPI-ESM-1-2-HAM',
 'MPI-ESM1-2-HR',
 'MPI-ESM1-2-LR',
 'MRI-ESM2-0',
 'NESM3',
 'NorCPM1',
 'NorESM1-F',
 'NorESM2-LM',
 'NorESM2-MM',
 'SAM0-UNICON',
 'TaiESM1',
 'UKESM1-0-LL']

In [6]:
selected_models = ['AWI-ESM-1-1-LR',
 'CAS-ESM2-0',
 'CESM2-FV2',
 'CESM2-WACCM-FV2',
 'CIESM',
 'CMCC-CM2-SR5',
 'CanESM5-CanOE',
 'E3SM-1-1',
 'E3SM-1-1-ECA',
 'EC-Earth3-Veg',
 'EC-Earth3-Veg-LR',
 'FGOALS-f3-L',
 'FGOALS-g3',
 'FIO-ESM-2-0',
 'GISS-E2-1-G-CC',
 'GISS-E2-1-H',
 'GISS-E2-2-G',
 'HadGEM3-GC31-MM',
 'IITM-ESM',
 'KACE-1-0-G',
 'KIOST-ESM',
 'MPI-ESM-1-2-HAM',
 'NESM3',
 'NorCPM1',
 'NorESM1-F',
 'SAM0-UNICON',
 'TaiESM1']

## Select model(s)

In [7]:
selected_models = ['MPI-ESM-1-2-HAM']
experiments[model]

{'piControl': ['r1i1p1f1'],
 'abrupt-4xCO2': ['r1i1p1f1'],
 'historical': ['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1'],
 '1pctCO2': ['r1i1p1f1'],
 'ssp370': ['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1']}

In [6]:
col_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
col = intake.open_esm_datastore(col_url)

In [8]:
# loop through models instead
    
#for model in model_names:
for model in selected_models:
    filename = '../Processed_data/Calendars/' + model + '_calendars.txt'
    column_names = ['model', 'exp', 'member', 'calendar']
    df = pd.DataFrame(columns = column_names)

    #for model in experiments:
    for exp in experiments[model]:
            for member in experiments[model][exp]:
                cat = col.search(experiment_id = exp, variable_id='tas', table_id='Amon', source_id = model, member_id = member)
                dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}})
                for key in dset_dict.keys():
                    ds = dset_dict[key]

                calendar = ds.time.encoding['calendar']
                print(model, exp, member, calendar)
                df_row = pd.DataFrame([[model, exp, member, calendar]] ,columns = column_names)
                df = df.append(df_row, ignore_index = True)
    df.to_csv(filename, ' ')
                



--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


MPI-ESM-1-2-HAM piControl r1i1p1f1 proleptic_gregorian

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


MPI-ESM-1-2-HAM abrupt-4xCO2 r1i1p1f1 proleptic_gregorian

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


MPI-ESM-1-2-HAM historical r1i1p1f1 proleptic_gregorian

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


MPI-ESM-1-2-HAM historical r2i1p1f1 proleptic_gregorian

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


MPI-ESM-1-2-HAM historical r3i1p1f1 proleptic_gregorian

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


MPI-ESM-1-2-HAM 1pctCO2 r1i1p1f1 proleptic_gregorian

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


MPI-ESM-1-2-HAM ssp370 r1i1p1f1 proleptic_gregorian

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


MPI-ESM-1-2-HAM ssp370 r2i1p1f1 proleptic_gregorian

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


MPI-ESM-1-2-HAM ssp370 r3i1p1f1 proleptic_gregorian


In [21]:
def calendar_check_new(model):
    calendarfile = '../Processed_data/Calendars/' + model + '_calendars.txt'
    cal_df = pd.read_table(calendarfile, index_col=0, sep = ' ')
    calendars_used = cal_df['calendar'].drop_duplicates()
    # I think 365 days calendar must be the same as noleap
    return calendars_used.values

def calendar_check_old(model):
    # Time formats for piControl, found from manual check:
    if model in ['TaiESM1', 'BCC-CSM2-MR', 'BCC-ESM1', 'CAMS-CSM1-0', 'CAS-ESM2-0', 'FGOALS-f3-L', 'FGOALS-g3', 'CanESM5', 'CanESM5-CanOE', 'E3SM-1-0', 'E3SM-1-1', 'E3SM-1-1-ECA', 'FIO-ESM-2-0', 'INM-CM4-8', 'INM-CM5-0', 'GISS-E2-1-G', 'GISS-E2-1-G-CC', 'GISS-E2-1-H', 'GISS-E2-2-G', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', 'NorCPM1', 'NorESM1-F', 'NorESM2-LM', 'NorESM2-MM', 'GFDL-CM4', 'SAM0-UNICON', 'GFDL-ESM4', 'CIESM', 'MCM-UA-1-0']:
        ds_calendar = 'noleap'
    elif model in ['EC-Earth3', 'CNRM-CM6-1', 'IPSL-CM6A-LR', 'MIROC-ES2L', 'MIROC6', 'NESM3']: # 'IPSL-CM6A-LR':'piClim-4xCO2','piClim-control' says noleap calendar
        ds_calendar = 'gregorian'
    elif model in ['AWI-CM-1-1-MR', 'EC-Earth3-Veg', 'EC-Earth3-Veg-LR', 'ACCESS-ESM1-5', 'ACCESS-CM2', 'MPI-ESM-1-2-HAM', 'MPI-ESM1-2-LR', 'MPI-ESM1-2-HR', 'EC-Earth3-LR']:
        ds_calendar = 'proleptic_gregorian'
    elif model in ['UKESM1-0-LL', 'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM', 'CNRM-ESM2-1', 'KACE-1-0-G', 'MRI-ESM2-0']:
        ds_calendar = '360_day'
        if model in ['CNRM-ESM2-1', 'MRI-ESM2-0']:
            print('piControl is 360_day, the other experiments unknown')
    elif model in ['IITM-ESM']:
        ds_calendar = 'julian'
    elif model in ['AWI-ESM-1-1-LR', 'CNRM-CM6-1-HR', 'EC-Earth3', 'EC-Earth3-LR']:
        #ds_calendar = 'datetime64'
        print('not 100% sure what calendar this model has, but a guess is made based on other models from same institution')
        if model in ['AWI-ESM-1-1-LR']:
            print('calendar is likely proleptic gregorian')
            ds_calendar = 'proleptic_gregorian'
        elif model in ['CNRM-CM6-1-HR']:
            print('calendar is likely gregorian')
            ds_calendar = 'gregorian'
    return ds_calendar

In [22]:
for model in model_names:
    cals = calendar_check_new(model)
    print(model, cals, 'with old function:', calendar_check_old(model))
    # may need to check data for models:
    # CNRM-ESM2-1 ['gregorian' 'proleptic_gregorian' '360_day'] with old function: 360_day
    # EC-Earth3 ['proleptic_gregorian' 'gregorian'] with old function: gregorian
    # IPSL-CM6A-LR ['noleap' 'gregorian' 'proleptic_gregorian'] with old function: gregorian
    # MIROC6 ['gregorian' 'proleptic_gregorian'] with old function: gregorian
    # MRI-ESM2-0 ['proleptic_gregorian' '360_day'] with old function: 360_day. I changed all data to use 'proleptic_gregorian' in averaging
    # NESM3 ['gregorian' 'standard'] with old function: gregorian. Check what 'standard' means

ACCESS-CM2 ['proleptic_gregorian'] with old function: proleptic_gregorian
ACCESS-ESM1-5 ['proleptic_gregorian'] with old function: proleptic_gregorian
AWI-CM-1-1-MR ['proleptic_gregorian'] with old function: proleptic_gregorian
not 100% sure what calendar this model has, but a guess is made based on other models from same institution
calendar is likely proleptic gregorian
AWI-ESM-1-1-LR ['proleptic_gregorian'] with old function: proleptic_gregorian
BCC-CSM2-MR ['365_day'] with old function: noleap
BCC-ESM1 ['noleap' '365_day'] with old function: noleap
CAMS-CSM1-0 ['noleap' '365_day'] with old function: noleap
CESM2 ['noleap'] with old function: noleap
CESM2-FV2 ['noleap'] with old function: noleap
CESM2-WACCM ['noleap' '365_day'] with old function: noleap
CESM2-WACCM-FV2 ['noleap'] with old function: noleap
CNRM-CM6-1 ['gregorian'] with old function: gregorian
not 100% sure what calendar this model has, but a guess is made based on other models from same institution
calendar is likely