In [22]:
import iris

In [23]:
from distributed import Client

c = Client('172.31.18.5:8786')
c

<Client: scheduler="172.31.18.5:8786" processes=2 cores=2>

Finding available files

In [24]:
from boto.s3.connection import S3Connection
import os

os.environ['S3_USE_SIGV4'] = 'True'

def list_files(bucket):
    conn = S3Connection(host='s3.eu-west-2.amazonaws.com')
    bucket = conn.get_bucket(bucket)
    results = []
    keys = iter(bucket.list(prefix='prods'))
    for i in range(10):
        results.append(next(keys))
    return ['/usr/local/share/notebooks/data/mogreps-g/{}'.format(k.key) for k in results]


in_files = list_files('mogreps-g')
print(len(in_files))
in_files[:10]

10


['/usr/local/share/notebooks/data/mogreps-g/prods_op_mogreps-g_20160101_00_00_003.nc',
 '/usr/local/share/notebooks/data/mogreps-g/prods_op_mogreps-g_20160101_00_00_006.nc',
 '/usr/local/share/notebooks/data/mogreps-g/prods_op_mogreps-g_20160101_00_00_009.nc',
 '/usr/local/share/notebooks/data/mogreps-g/prods_op_mogreps-g_20160101_00_00_012.nc',
 '/usr/local/share/notebooks/data/mogreps-g/prods_op_mogreps-g_20160101_00_00_015.nc',
 '/usr/local/share/notebooks/data/mogreps-g/prods_op_mogreps-g_20160101_00_00_018.nc',
 '/usr/local/share/notebooks/data/mogreps-g/prods_op_mogreps-g_20160101_00_00_021.nc',
 '/usr/local/share/notebooks/data/mogreps-g/prods_op_mogreps-g_20160101_00_00_024.nc',
 '/usr/local/share/notebooks/data/mogreps-g/prods_op_mogreps-g_20160101_00_00_027.nc',
 '/usr/local/share/notebooks/data/mogreps-g/prods_op_mogreps-g_20160101_00_00_030.nc']

In [25]:
# create a dask bag (db). 
# What we end up with is a list of instructions to run the 'load_cubes' function on each input file.
from iris.cube import CubeList
from dask import delayed
import dask.bag as db

@delayed
def load_cubes(address):
    def add_realization(cube, field, filename):
        if not cube.coords('realization'):
            realization = int(filename.split('_')[-2])
            realization_coord = iris.coords.AuxCoord(realization, standard_name='realization')
            cube.add_aux_coord(realization_coord)
    return iris.load(address, callback=add_realization)

delayed_cubes = db.from_delayed([load_cubes(f) for f in in_files])
delayed_cubes

dask.bag<bag-fro..., npartitions=10>

In [26]:
delayed_cubes.take(10)

(<iris 'Cube' of wet_bulb_potential_temperature / (K) (time: 2; pressure: 3; latitude: 600; longitude: 800)>,
 <iris 'Cube' of wet_bulb_freezing_level_altitude / (m) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of air_pressure_at_sea_level / (Pa) (time: 2; latitude: 600; longitude: 800)>,
 <iris 'Cube' of air_temperature / (K) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of air_temperature / (K) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of air_temperature / (K) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of air_temperature / (K) (time: 2; pressure: 16; latitude: 600; longitude: 800)>,
 <iris 'Cube' of dew_point_temperature / (K) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of fog_area_fraction / (1) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of geopotential_height / (m) (time: 2; pressure: 9; latitude: 600; longitude: 800)>)

In [20]:
cubes = c.persist(delayed_cubes)
cubes

dask.bag<bag-fro..., npartitions=10>

In [21]:
cubes.take(10)

(<iris 'Cube' of wet_bulb_freezing_level_altitude / (m) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of wet_bulb_potential_temperature / (K) (time: 2; pressure: 3; latitude: 600; longitude: 800)>,
 <iris 'Cube' of air_pressure_at_sea_level / (Pa) (time: 2; latitude: 600; longitude: 800)>,
 <iris 'Cube' of air_temperature / (K) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of air_temperature / (K) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of air_temperature / (K) (time: 2; pressure: 16; latitude: 600; longitude: 800)>,
 <iris 'Cube' of air_temperature / (K) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of dew_point_temperature / (K) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of fog_area_fraction / (1) (latitude: 600; longitude: 800)>,
 <iris 'Cube' of geopotential_height / (m) (time: 2; pressure: 9; latitude: 600; longitude: 800)>)

In [13]:
c.publish_dataset(mogreps=cubes)