In [None]:
import sys
sys.path.append('/home/benkoziol/l/ocgis/src')
import ocgis
assert(ocgis.__release__ == '1.2.0.dev1')

Configure some environment variables to point to the head directory containing climate data files used in the demo as well as the output directory.

In [None]:
import tempfile
ocgis.env.DIR_DATA = '/home/benkoziol/data'
ocgis.env.DIR_OUTPUT = tempfile.mkdtemp()
print ocgis.env.DIR_OUTPUT

Inspect a target file's metadata.

In [None]:
uri = 'tas_day_CanCM4_decadal2011_r2i1p1_20120101-20211231.nc'
variable = 'tas'
rd = ocgis.RequestDataset(uri=uri,variable=variable)
rd.inspect()

Subset a target file by the boundary of California using an intersects GIS operation (the default), and write the data to an ESRI Shapefile. Select the first time coordinate only.

In [None]:
geom = '/home/benkoziol/l/project/ocg/bin/shp/state_boundaries/state_boundaries.shp'
ops = ocgis.OcgOperations(dataset=rd, geom=geom, geom_select_uid=[25], snippet=True,
                          output_format='shp', prefix='ca')
ops.execute()

 Also write the model grid to shapefile.

In [None]:
ocgis.OcgOperations(dataset=rd, output_format='shp', snippet=True, prefix='grid').execute()

Spatially average the grid cells clipped to the boundary of California for all the June, July, and August months in the target dataset. Write the output data to CSV.

In [None]:
import webbrowser
rd = ocgis.RequestDataset(uri=uri, variable=variable, time_region={'month': [6, 7, 8]})
ops = ocgis.OcgOperations(dataset=rd, geom=geom, geom_select_uid=[25], spatial_operation='clip',
                          output_format='csv', prefix='ca_spatial_average', aggregate=True)
ret = ops.execute()
print(ret)
webbrowser.open(ret)

Perform a difference calulation between two variables using a string function. Inspect the metadata of the output NetCDF file.

In [None]:
rd1 = ocgis.RequestDataset(uri='tasmax_day_CanCM4_decadal2010_r2i1p1_20110101-20201231.nc',
                           variable='tasmax')
rd2 = ocgis.RequestDataset(uri='tasmin_day_CanCM4_decadal2010_r2i1p1_20110101-20201231.nc',
                           variable='tasmin')
calc = 'diff=tasmax-tasmin'
ops = ocgis.OcgOperations(dataset=[rd1, rd2], calc=calc, output_format='nc', geom='state_boundaries',
                          select_ugid=[25], prefix='diff')
ret = ops.execute()
print(ocgis.Inspect(ret))

Calculate a sequence of statistics to produce a July time series conforming the target units from Kelvin to Celsius. Perform the calculations on the spatially averaged data for California.

In [None]:
import webbrowser
rd = ocgis.RequestDataset(uri=uri, variable=variable, time_region={'month': [7]}, conform_units_to='celsius', 
                          name='calcs')
calc = [{'func': 'mean', 'name': 'mean'},
        {'func': 'std', 'name': 'stdev'},
        {'func': 'min', 'name': 'min'},
        {'func': 'max', 'name': 'max'},
        {'func': 'median', 'name': 'median'},
        {'func': 'freq_perc', 'name': 'fp_95', 'kwds': {'percentile': 95.0}},
        {'func': 'freq_perc', 'name': 'fp_5', 'kwds':{'percentile': 5.0}},]
calc_grouping = ['month','year']
ops = ocgis.OcgOperations(dataset=rd, geom='state_boundaries', geom_select_uid=[25, 26], spatial_operation='clip',
                          output_format= 'csv', prefix='ca_calcs', aggregate=True, calc=calc,
                          calc_grouping=calc_grouping)
ret = ops.execute()
print(ret)
webbrowser.open(ret)

Perform the same operation returning the data as a "collection". Print the derived variable aliases.

In [None]:
ops.output_format = 'numpy'
ret = ops.execute()
print(ret)
print(ret[25][rd.name].variables.keys())

Variable values are always stored as five dimensions: realization, time, level, row, column

In [None]:
print(ret[25][rd.name].variables['mean'])
print(ret[25][rd.name].variables['mean'].value.shape)

Print some time values from the temporal dimension.

In [None]:
print(ret[25][rd.name].temporal.value_datetime)
print(ret[25][rd.name].temporal.bounds_datetime)

Print example variable values.

In [None]:
print(ret[25][rd.name].variables['mean'].value.squeeze())

Geometries are stored as Shapely objects with associated attributes.

In [None]:
print(type(ret.geoms[25]))
print(ret.geoms[25]).bounds
print(ret.properties[25].dtype.names)
print(ret.properties[25])

Read a data file with a coordinate system, and inspect its properties. Also take a closer look at some field properties.

In [None]:
uri = 'tas_RCM3_ncep_1981010103.nc'
# Variables can be auto-discovered when reasonable CF metadata is present.
rd = ocgis.RequestDataset(uri=uri)
field = rd.get()
assert(field.variables['tas']._value is None)
# Values are not loaded after slicing/subsetting.
sub = field[:, 0:10, :, 5, 8]
assert(sub.variables['tas']._value is None)
# The "source index" is sliced and used for on-demand loading.
print(sub.temporal._src_idx)

Take a closer look at the coordinate system object.

In [None]:
# The coordinate system objects are refined versions of PROJ.4 dictionaries and OSR spatial reference objects.
print(type(field.spatial.crs))
print(field.spatial.crs.value)
print(field.spatial.crs.sr.ExportToProj4())

For three variables, calculate monthly averages for the year 2014 for each U.S. state boundary.

In [None]:
rd1 = ocgis.RequestDataset(uri='tasmax_day_CanCM4_decadal2010_r2i1p1_20110101-20201231.nc')
rd2 = ocgis.RequestDataset(uri='tasmin_day_CanCM4_decadal2010_r2i1p1_20110101-20201231.nc')
rd3 = ocgis.RequestDataset(uri='tas_day_CanCM4_decadal2010_r2i1p1_20110101-20201231.nc')
calc = [{'func': 'mean', 'name': 'mean'}]
calc_grouping = ['month']
ops = ocgis.OcgOperations(dataset=[rd1, rd2, rd3], geom='state_boundaries', aggregate=True,
                          output_format='shp', spatial_operation='clip', prefix='temps',
                          calc=calc, calc_grouping=calc_grouping, time_region={'year': [2014]},
                          headers=['value', 'calc_alias', 'year', 'month', 'alias'], conform_units_to='fahrenheit')
ret = ops.execute()
print(ret)

Use ESMF regridding with a subset and spatial aggregation, writing the data to shapefile.

In [None]:
rd_src = ocgis.RequestDataset(uri='tas_day_CanCM4_decadal2010_r2i1p1_20110101-20201231.nc',
                              variable='tas')
rd_dest = ocgis.RequestDataset(uri='nldas_met_update.obs.daily.pr.1991.nc')
print rd_src.get().spatial.grid.resolution
print rd_dest.get().spatial.grid.resolution

regrid_options = {'with_corners': False}
ops = ocgis.OcgOperations(dataset=rd_src, regrid_destination=rd_dest, geom_select_uid=[6, 16], 
                          agg_selection=True, geom='state_boundaries', snippet=True,
                          output_format='shp', prefix='regrid', regrid_options=regrid_options)
print ops.execute()

Shapefiles may also be used as fields along with ESMPy fields (almost)!

In [None]:
rd = ocgis.RequestDataset(uri='/home/benkoziol/l/project/ocg/bin/shp/state_boundaries/state_boundaries.shp')
sfield = rd.get()
print(sfield.variables.keys())
print(sfield.variables['STATE_NAME'].value.squeeze())