Preprocessing step.
Calculate daily temperature and humidity min and max, and upload them to google bucket.
This is used to index the lookup table for number of hours per day.

In [None]:
import xarray as xr
import numpy as np
import subprocess
import os.path as path
from datetime import datetime

In [None]:
tmp_dir = '<REPLACE WITH TEMPORARY DATA DIRECTORY>'
gs_dir_tmax = f'<REPLACE WITH LOCATION OF CMIP6 TEMPERATURE DATA ON BUCKET'
my_dir_tmax = '<REPLACE WITH OUTPUT LOCATION FOR TEMPERATURE DATA ON GOOGLE BUCKET>'
gs_dir_tmean = f'<REPLACE WITH LOCATION OF CMIP6 TEMPERATURE DATA ON BUCKET'
my_dir_tmean = '<REPLACE WITH OUTPUT LOCATION FOR TEMPERATURE DATA ON GOOGLE BUCKET>'
gs_dir_rhmin = f'<REPLACE WITH LOCATION OF CMIP6 RH DATA ON BUCKET'
my_dir_rhmin = '<REPLACE WITH OUTPUT LOCATION FOR RH DATA ON GOOGLE BUCKET>'
gs_dir_rhmean = f'<REPLACE WITH LOCATION OF CMIP6 RH DATA ON BUCKET'
my_dir_rhmean = '<REPLACE WITH OUTPUT LOCATION FOR RH DATA ON GOOGLE BUCKET>'

In [None]:
model_list = ['ACCESS-CM2', 'ACCESS-ESM1-5', 'AWI-CM-1-1-MR', 'BCC-CSM2-MR', 'CAMS-CSM1-0', 'CMCC-ESM2',
              'CNRM-CM6-1-HR', 'CNRM-CM6-1', 'CNRM-ESM2-1', 'CanESM5', 'EC-Earth3-Veg-LR', 'EC-Earth3-Veg',
              'EC-Earth3', 'FGOALS-g3', 'GFDL-CM4', 'GFDL-ESM4', 'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM', 'INM-CM4-8',
              'INM-CM5-0', 'IPSL-CM6A-LR', 'KIOST-ESM', 'MIROC-ES2L', 'MIROC6', 'MPI-ESM1-2-HR', 'MPI-ESM1-2-LR',
              'MRI-ESM2-0', 'NESM3', 'NorESM2-LM', 'NorESM2-MM', 'UKESM1-0-LL']
var_names = ['tasmax','tas','hursmin','hurs']

n_models = len(model_list)
n_vars = len(var_names)

period_in = 'historical'
period_out = '1950_1970'
date0 = datetime(1950,1,1)
date1 = datetime(1970,1,1)

In [None]:
# Temperature Max
for ii, model in enumerate(model_list):
    
    if ii<3:
        continue
    
    print(ii, end='\r')
    
    # Get files
    fp = path.join(gs_dir_tmax, f'tasmax_day_{model}_{period_in}*')
    subprocess.run(f'gsutil -m cp {fp} {tmp_dir}', shell=True,
                   stdout=subprocess.DEVNULL, 
                   stderr=subprocess.DEVNULL )
    
    # Open Files
    ds = xr.open_mfdataset(path.join(tmp_dir, '*'))
    ds = ds.sel(time=slice(date0, date1))
    
    # Write to file
    fp_out = path.join(tmp_dir, f'tasmax_day_{model}_{period_out}.nc')
    ds.to_netcdf(fp_out)
    
    # Move to my google bucket
    subprocess.run(f'gsutil -m cp {fp_out} {my_dir_tmax}', shell=True,
                   stdout=subprocess.DEVNULL, 
                   stderr=subprocess.DEVNULL )
    
    # Delete everything in tmp directory
    subprocess.run(f'rm -f {path.join(tmp_dir, "*")}', shell=True,
                   stdout=subprocess.DEVNULL, 
                   stderr=subprocess.DEVNULL )

In [None]:
# Temperature Mean
gs_dir = f'gs://cmip6_data/CMIP6_regridded_data/r288x192/daily/average_temperature/{period_in}/'
my_dir = 'gs://fqqzlp/carter2/t_mean'

for ii, model in enumerate(model_list):
    
    print(ii)
    
    # Get files
    fp = path.join(gs_dir_tmean, f'tas_day_{model}_{period_in}*')
    subprocess.run(f'gsutil -m cp {fp} {tmp_dir}', shell=True)
    
    # Open Files
    ds = xr.open_mfdataset(path.join(tmp_dir, '*'), chunks='auto')
    ds = ds.sel(time=slice(date0, date1))
    
    # Write to file
    fp_out = path.join(tmp_dir, f'tas_day_{model}_{period_out}.nc')
    ds.to_netcdf(fp_out)
    
    # Move to my google bucket
    subprocess.run(f'gsutil -m cp {fp_out} {my_dir_tmean}', shell=True)
    
    # Delete everything in tmp directory
    subprocess.run(f'rm -f {path.join(tmp_dir, "*")}', shell=True)

In [None]:
# RH Min
gs_dir = f'gs://cmip6_data/CMIP6_regridded_data/r288x192/daily/minimum_relative_humidity/{period_in}/'
my_dir = 'gs://fqqzlp/carter2/rh_min'

for ii, model in enumerate(model_list):
    
    print(ii)
    
    # Get files
    fp = path.join(gs_dir_rhmin, f'hursmin_day_{model}_{period_in}*')
    subprocess.run(f'gsutil -m cp {fp} {tmp_dir}', shell=True)
    
    # Open Files
    ds = xr.open_mfdataset(path.join(tmp_dir, '*'), chunks='auto')
    ds = ds.sel(time=slice(date0, date1))
    
    # Write to file
    fp_out = path.join(tmp_dir, f'hursmin_day_{model}_{period_out}.nc')
    ds.to_netcdf(fp_out)
    
    # Move to my google bucket
    subprocess.run(f'gsutil -m cp {fp_out} {my_dir_rhmin}', shell=True)
    
    # Delete everything in tmp directory
    subprocess.run(f'rm -f {path.join(tmp_dir, "*")}', shell=True)

In [None]:
# RH Mean
gs_dir = f'gs://cmip6_data/CMIP6_regridded_data/r288x192/daily/average_relative_humidity/{period_in}/'
my_dir = 'gs://fqqzlp/carter2/rh_mean'

for ii, model in enumerate(model_list):
    
    print(ii)
    
    # Get files
    fp = path.join(gs_dir_rhmean, f'hurs_day_{model}_{period_in}*')
    subprocess.run(f'gsutil -m cp {fp} {tmp_dir}', shell=True)
    
    # Open Files
    ds = xr.open_mfdataset(path.join(tmp_dir, '*'), chunks='auto')
    ds = ds.sel(time=slice(date0, date1))
    
    # Write to file
    fp_out = path.join(tmp_dir, f'hurs_day_{model}_{period_out}.nc')
    ds.to_netcdf(fp_out)
    
    # Move to my google bucket_
    subprocess.run(f'gsutil -m cp {fp_out} {my_dir_rhmean}', shell=True)
    
    # Delete everything in tmp directory
    subprocess.run(f'rm -f {path.join(tmp_dir, "*")}', shell=True)