In [1]:
# imports
import iris
import iris.coord_categorisation as iccat
from iris.experimental import equalise_cubes

import glob
import re
import os

In [2]:
# folder of files to process
file_path = '_cp-rcm/ICTP-RegCM/ALP-3/HadGEM/historical/r1i1p1/ICTP-RegCM4-7-0/v0/1hr/pr/pr*.nc'
freq = 'day'

In [3]:

# function to fix any rounding issues with coord systems
# needed for _HCLIMcom/ICHEC-EC-EARTH/rcp85
def fix_coord_metadata(cube):
    for c in ['projection_x_coordinate', 'projection_y_coordinate']:
        cube.coord(c).coord_system.false_easting = round(cube.coord(c).coord_system.false_easting, 9)
        cube.coord(c).coord_system.false_northing = round(cube.coord(c).coord_system.false_northing, 9)

In [4]:
# load data
# in operations loop over each file individually

# glob the file_path and process each file individually
files = glob.glob(file_path)

cubes = iris.load(f'{files[0]}', ['pr', 'tas'])

  semi_minor_axis = semi_major_axis - ((1.0 / inverse_flattening) *


In [5]:
# check and fix rounding issue in metadata for lambert coords
if len(cubes[0].coords("projection_x_coordinate")) > 0:
    for c in cubes:
        fix_coord_metadata(c)

# concatenate and add required coords for processing
equalise_cubes.equalise_attributes(cubes)
cube = cubes.concatenate_cube()

# check last two time points, if from a different month, subtract half an hour to fix
if cube.coord('time').cell(-2).point.month != cube.coord('time').cell(-1).point.month:
    # check time units is what we expect
    unit = cube.coord('time').units
    if unit.name[:4] == 'hour':
        thirty_mins = 0.5
    elif unit.name[:6] == 'second':
        thirty_mins = 60 * 30
    elif unit.name[:3] == 'day':
        thirty_mins = 0.5 / 24
    else:
        raise ValueError(f"Don't know how to deal with: '{unit.name}'")

    print('Subtracting half an hour from time coord first')
    new_points = cube.coord('time').points - thirty_mins
    cube.coord('time').points = new_points

In [6]:

iccat.add_year(cube, 'time')
if freq == 'mon':
    iccat.add_month_number(cube, 'time')
    agg_by = ['month_number', 'year']
    remove_later = 'month_number'
elif freq == 'day':
    iccat.add_day_of_year(cube, 'time')
    agg_by = ['day_of_year', 'year']
    remove_later = 'day_of_year'
else:
    raise ValueError('Unrecognised frequency')


In [7]:
# compute averages
print(f'Computing {freq} average')
means = cube.aggregated_by(agg_by, iris.analysis.MEAN)

Computing day average


In [8]:
# remove no longer needed aux coords
means.remove_coord(remove_later)
means.remove_coord('year')

In [11]:

# make sure standard names are being used
if means.var_name == 'pr':
    means.standard_name = 'precipitation_flux'
if means.var_name == 'tas':
    means.standard_name = 'air_temperature'

# save
# construct new filename
# need to compute start and end month of file
if freq == 'mon':
    file_start = f'{means.coord("time").cell(0).point.year}{means.coord("time").cell(0).point.month:02}'
    file_end = f'{means.coord("time").cell(-1).point.year}{means.coord("time").cell(-1).point.month:02}'
else:
    file_start = f'{means.coord("time").cell(0).point.year}{means.coord("time").cell(0).point.month:02}{means.coord("time").cell(0).point.day:02}'
    file_end = f'{means.coord("time").cell(-1).point.year}{means.coord("time").cell(-1).point.month:02}{means.coord("time").cell(-1).point.day:02}'

# do appropriate replacements
file_template = files[0]
file_template = re.sub(r'\/1hr\/', f'/{freq}/', file_template)
file_template = re.sub(r'_1hr_', f'_{freq}_', file_template)
if re.search(r'\d{12}-\d{12}.nc', file_template):
    file_template = re.sub(r'\d{12}-\d{12}.nc', f'{file_start}-{file_end}.nc', file_template)
elif re.search(r'\d{10}-\d{10}.nc', file_template):
    file_template = re.sub(r'\d{10}-\d{10}.nc', f'{file_start}-{file_end}.nc', file_template)
elif re.search(r'\d{4}_\d{4}.nc', file_template):
    file_template = re.sub(r'\d{4}_\d{4}.nc', f'{file_start}-{file_end}.nc', file_template)
else:
    raise ValueError("Couldn't match date strings in filename")

out_file = file_template

# make output folder if necessary
out_dir, fname = os.path.split(out_file)
out_dir = f"ALP-3_{freq}_means/{out_dir.split('/', 1)[1]}"
os.makedirs(out_dir, exist_ok=True)

out_path = os.path.join(out_dir, fname)

In [15]:
# save
print(f'Saving to: {out_path}')
iris.save(means, out_path)

Saving to: ALP-3_day_means/ICTP-RegCM/ALP-3/HadGEM/historical/r1i1p1/ICTP-RegCM4-7-0/v0/day/pr/pr_ALP-3_HadGEM_historical_r1i1p1_ICTP-RegCM4-7-0_v0_day_19950401-19950430.nc


In [13]:
print(files[0])
print(means)


_cp-rcm/ICTP-RegCM/ALP-3/HadGEM/historical/r1i1p1/ICTP-RegCM4-7-0/v0/1hr/pr/pr_ALP-3_HadGEM_historical_r1i1p1_ICTP-RegCM4-7-0_v0_1hr_1995040100-1995050100.nc
precipitation_flux / (kg m-2 s-1)   (time: 30; projection_y_coordinate: 572; projection_x_coordinate: 602)
     Dimension coordinates:
          time                           x                            -                             -
          projection_y_coordinate        -                            x                             -
          projection_x_coordinate        -                            -                             x
     Auxiliary coordinates:
          latitude                       -                            x                             x
          longitude                      -                            x                             x
     Attributes:
          CDI: Climate Data Interface version 1.9.8rc1 (http://mpimet.mpg.de/cdi)
          CDO: Climate Data Operators version 1.9.8rc1 (http://mpimet.