In [9]:
import iris
iris.FUTURE.netcdf_promote = True
iris.FUTURE.netcdf_no_unlimited = True

In [12]:
from distributed import Client
c = Client('172.31.20.215:8786')
c

<Client: scheduler="172.31.20.215:8786" processes=20 cores=40>

In [13]:
month = '01'
day = '01'
run = '06'
prefix = 'prods_op_mogreps-g_2016{}{}_{}'.format(month, day, run)
prefix

'prods_op_mogreps-g_20160101_06'

In [14]:
from boto.s3.connection import S3Connection
import os

os.environ['S3_USE_SIGV4'] = 'True'

In [15]:
import re

In [18]:
import iris
import os

from iris.cube import CubeList
from dask import delayed
import dask.bag as db
import boto
import glob
import numpy as np
from itertools import groupby
from collections import defaultdict
from tempfile import NamedTemporaryFile

def load_cubes(address):
    def metadata(cube, field, filename):
        if not cube.coords('realization'):
            realization = np.array([filename.split('_')[-2]], dtype='int32')
            realization_coord = iris.coords.AuxCoord(realization, standard_name='realization', var_name='realization')
            cube.add_aux_coord(realization_coord)
        # the '0' member has no permutations and is labelled differently
        # for the sake of easier manipulation we're going to normalise the labels.
        cube.coord('time').var_name = 'time'
        cube.coord('forecast_period').var_name = 'forecast_period'
        if re.search('_\d$', cube.var_name):
            cube.var_name = cube.var_name[:-2]
        if type(cube.coord('forecast_period')) == iris.coords.AuxCoord:
            new_coord = iris.coords.DimCoord.from_coord(cube.coord('forecast_period'))
            cube.replace_coord(new_coord)
        
    return iris.load_raw(address, callback=metadata)


def download(local_path, key):
    f_name = '{}{}'.format(local_path, key.name)
    key.get_contents_to_filename(f_name)
    return f_name

def wrangle_cubes(cubes):
    groups = defaultdict(list)

    for key, group in groupby(cubes, key = lambda x: x.name()):
        groups[key].extend(iris.cube.CubeList(group))
    
    def process(cubes):
        shapes = defaultdict(list)
        for key, group in groupby(cubes, key = lambda x: x.shape):
            shapes[key].extend(list(group))
        for key, val in shapes.items():
            if len(val) == 1:
                if(key[1:] in shapes.keys()):
                    indx = cubes.index(val[0])
                    cube = cubes.pop(indx)
                    cubes.extend(list(cube.slices_over(0)))
        return iris.cube.CubeList(cubes).merge()
    
    results = {}
    for key, group in groups.items():
        result = process(group)
        results[key] = result
        
    return results

def write_and_upload(results_bucket, param, cubes, model, run_date, run_time):
    tmp = NamedTemporaryFile()
    iris.fileformats.netcdf.save(cubes, tmp.name, netcdf_format="NETCDF4", zlib=True, complevel=1)
    
    # temperature/<date>_<time>_<mogreps-g>_<param>.nc
    key_name = '{}/{}_{}_{}_{}.nc'.format(param, run_date, run_time, model, param)
    
    conn = S3Connection(host='s3.eu-west-2.amazonaws.com')
    bucket = conn.get_bucket(results_bucket)
    key = boto.s3.key.Key(bucket)
    key.key = key_name
    key.set_contents_from_filename(tmp.name)
    
    tmp.close()


@delayed
def process_model_run(bucket, model, run_date, run_time):
    prefix = 'prods_op_{}_{}_{}'.format(model, run_date, run_time)
    local_path = '/tmp/{}/'.format(prefix)
    try:
        os.mkdir(local_path)
    except FileExistsError:
        for nc_f in glob.glob("/tmp/{}/*.nc".format(prefix)):
            os.remove(nc_f)
    conn = S3Connection(host='s3.eu-west-2.amazonaws.com')
    bucket = conn.get_bucket(bucket)
    keys = iter(bucket.list(prefix=prefix))
    for key in keys:
        download(local_path, key)
    cubes = load_cubes('{}*'.format(local_path))
    results = wrangle_cubes(cubes)
    for param, cubes in results.items():
        write_and_upload('mogreps-g-sample2', param, cubes, model, run_date, run_time)
    for nc_f in glob.glob("/tmp/{}/*.nc".format(prefix)):
            os.remove(nc_f)
    return True


One Model

In [19]:
from itertools import product

run_dates = ['20160101', '20160102', '20160103', '20160104', '20160105']
run_times = ['00', '06', '12', '18']
bucket = 'mogreps-g'
model = 'mogreps-g'

combos = (product(run_dates, run_times))
computations = list(map(lambda x: process_model_run(bucket, model, x[0], x[1]), combos))
print(computations)

[Delayed('process_model_run-ba5795b0-621b-43f8-bdf4-a43c1dff83c4'), Delayed('process_model_run-a69758e3-4589-48ab-9981-6f767389ea21'), Delayed('process_model_run-a37002f3-6e59-434f-a196-1d283c2c9cd0'), Delayed('process_model_run-7eb87285-6997-4cc2-b734-889642a26d71'), Delayed('process_model_run-8dc83b4f-3136-46ff-be72-3021b07f5f61'), Delayed('process_model_run-2ea958ab-a901-4593-95b0-387eae41f404'), Delayed('process_model_run-53a73481-f1a0-4cdf-a570-ba8664fad122'), Delayed('process_model_run-67688529-3d16-4b2a-9fe6-7d923af3e8db'), Delayed('process_model_run-c1920084-fdb0-4115-b3d3-dacaa13b6735'), Delayed('process_model_run-83a626ea-2eb7-4e36-abb9-73cdbb22f77d'), Delayed('process_model_run-128fa514-b007-4393-8d94-7ac188749b98'), Delayed('process_model_run-a399bb71-f884-44d4-8e7c-e8e135d0f2b2'), Delayed('process_model_run-3b058099-cdb1-4ef6-a5b2-4397eae82496'), Delayed('process_model_run-3dedf945-d23f-4a2c-8d54-5d3b4da44c52'), Delayed('process_model_run-dd53d1c9-37c2-4065-a3ee-e917b96483

In [21]:
c.compute(computations)

[<Future: status: pending, key: finalize-a931923d3a676cae43943e6528cef760>,
 <Future: status: pending, key: finalize-31ca94c7609597f3323f7e596a77c2ba>,
 <Future: status: pending, key: finalize-fc4aa614721e5f517ffcaa8fd6af7fa2>,
 <Future: status: pending, key: finalize-c33fa4ca43977247a13c0ef2e4c16e53>,
 <Future: status: pending, key: finalize-32816cd3cc82a68b36b576bea0da9770>,
 <Future: status: pending, key: finalize-eb8b55080fcbd1abefc3da1048752ce3>,
 <Future: status: pending, key: finalize-5a94071cbed3d1d7d7f65e64b3ce260e>,
 <Future: status: pending, key: finalize-770cf230180c9ea0924d6431ee8a3444>,
 <Future: status: pending, key: finalize-b6406773f97bbb5b6b7148c41338e69c>,
 <Future: status: pending, key: finalize-c3373da50b5e41f474621d22c3d70dde>,
 <Future: status: pending, key: finalize-0bcc9a022472441210f3518d4e140500>,
 <Future: status: pending, key: finalize-82f11e116c8847adf7f6ce6fa706d853>,
 <Future: status: pending, key: finalize-e55cd7258217cb7bb8c7c141208563a9>,
 <Future: st

Many models

In [None]:
bucket = 'mogreps-g'
model = 'mogreps-g'
run_date = '20160101'
run_time = '06'
x = process_model_run(bucket, model, run_date, run_time)

In [None]:
%time
x.compute()

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 5.25 µs


In [102]:
cubes.keys()

dict_keys(['high_type_cloud_area_fraction', 'wet_bulb_potential_temperature', 'wet_bulb_freezing_level_altitude', 'x_wind', 'visibility_in_air', 'air_pressure_at_sea_level', 'medium_type_cloud_area_fraction', 'geopotential_height', 'surface_downward_northward_stress', 'y_wind', 'low_type_cloud_area_fraction', 'wind_speed_of_gust', 'stratiform_rainfall_amount', 'stratiform_snowfall_amount', 'air_temperature', 'relative_humidity', 'surface_downward_eastward_stress', 'dew_point_temperature', 'fog_area_fraction'])

In [106]:
for key, value in cubes.items():
    print(key)
    print(value)
    print('\n')

high_type_cloud_area_fraction
0: high_type_cloud_area_fraction / (1) (realization: 12; time: 58; latitude: 600; longitude: 800)


wet_bulb_potential_temperature
0: wet_bulb_potential_temperature / (K) (realization: 12; time: 59; pressure: 3; latitude: 600; longitude: 800)


wet_bulb_freezing_level_altitude
0: wet_bulb_freezing_level_altitude / (m) (realization: 12; time: 58; latitude: 600; longitude: 800)


x_wind
0: x_wind / (m s-1)                    (realization: 12; time: 58; latitude: 601; longitude: 800)
1: x_wind / (m s-1)                    (realization: 12; time: 15; pressure: 16; latitude: 601; longitude: 800)
2: x_wind / (m s-1)                    (realization: 12; time: 59; latitude: 600; longitude: 800)
3: x_wind / (m s-1)                    (realization: 12; time: 44; pressure: 9; latitude: 601; longitude: 800)


visibility_in_air
0: visibility_in_air / (m)             (realization: 12; time: 58; latitude: 600; longitude: 800)


air_pressure_at_sea_level
0: air_pressure_a

In [117]:
print(len(list(cubes['fog_area_fraction'][0].slices_over(['realization', 'time']))))

696
