diff --git a/gaia/__init__.py b/gaia/__init__.py index 68c09bc..deb925a 100644 --- a/gaia/__init__.py +++ b/gaia/__init__.py @@ -51,6 +51,16 @@ class GaiaException(Exception): pass +def create(*args, **kwargs): + """ + Convenience method to provide a simpler API for creating + GaiaDataObject + """ + from gaia.io import readers + reader = readers.GaiaReader(*args, **kwargs) + return reader.read() + + def get_abspath(inpath): """ Get absolute path of a path string diff --git a/gaia/gaia_data.py b/gaia/gaia_data.py new file mode 100644 index 0000000..8dadf30 --- /dev/null +++ b/gaia/gaia_data.py @@ -0,0 +1,314 @@ +from __future__ import absolute_import, division, print_function +from builtins import ( + bytes, str, open, super, range, zip, round, input, int, pow, object +) + +from sqlalchemy import create_engine, MetaData, Table, text +from geoalchemy2 import Geometry +import fiona +import geopandas +try: + import osr +except ImportError: + from osgeo import osr + +from gaia import GaiaException +from gaia.filters import filter_postgis +from gaia.geo.gdal_functions import gdal_reproject +from gaia.util import sqlengines + + +class GaiaDataObject(object): + def __init__(self, reader=None, dataFormat=None, epsg=None, **kwargs): + self._data = None + self._metadata = None + self._reader = reader + self._datatype = None + self._dataformat = dataFormat + self._epsg = epsg + + def get_metadata(self): + if not self._metadata: + self._reader.load_metadata(self) + return self._metadata + + def set_metadata(self, metadata): + self._metadata = metadata + + def get_data(self): + if self._data is None: + self._reader.load_data(self) + return self._data + + def set_data(self, data): + self._data = data + + def get_epsg(self): + return self._epsg + + def reproject(self, epsg): + repro = geopandas.GeoDataFrame.copy(self._data) + repro[repro.geometry.name] = repro.geometry.to_crs(epsg=epsg) + repro.crs = fiona.crs.from_epsg(epsg) + self._data = repro + + def _getdatatype(self): + if not self._datatype: + self.get_metadata() + + return self._datatype + + def _setdatatype(self, value): + self._datatype = value + + datatype = property(_getdatatype, _setdatatype) + + def _getdataformat(self): + if not self._dataformat: + self.get_metadata() + + return self._dataformat + + def _setdataformat(self, value): + self._dataformat = value + + dataformat = property(_getdataformat, _setdataformat) + + +class GDALDataObject(GaiaDataObject): + def __init__(self, reader=None, **kwargs): + super(GDALDataObject, self).__init__(**kwargs) + self._reader = reader + self._epsgComputed = False + + def get_epsg(self): + if not self._epsgComputed: + if not self._data: + self.get_data() + + projection = self._data.GetProjection() + data_crs = osr.SpatialReference(wkt=projection) + + try: + self.epsg = int(data_crs.GetAttrValue('AUTHORITY', 1)) + self._epsgComputed = True + except KeyError: + raise GaiaException("EPSG code coud not be determined") + + return self.epsg + + def reproject(self, epsg): + self._data = gdal_reproject(self._data, '', epsg=epsg) + self.epsg = epsg + + +class PostgisDataObject(GaiaDataObject): + def __init__(self, reader=None, **kwargs): + super(PostgisDataObject, self).__init__(**kwargs) + + self._reader = reader + + self._table = None + self._hostname = None + self._dbname = None + self._user = None + self._password = None + self._columns = [] + self._filters = None + self._geom_column = 'the_geom' + self._epsg = None + self._meta = None + self._table_obj = None + + # Define table property + def _settable(self, table): + self._table = table + + def _gettable(self): + return self._table + + table = property(_gettable, _settable) + + # Define hostname property + def _sethostname(self, hostname): + self._hostname = hostname + + def _gethostname(self): + return self._hostname + + hostname = property(_gethostname, _sethostname) + + # Define db property + def _setdbname(self, dbname): + self._dbname = dbname + + def _getdbname(self): + return self._dbname + + dbname = property(_getdbname, _setdbname) + + # Define user property + def _setuser(self, user): + self._user = user + + def _getuser(self): + return self._user + + user = property(_getuser, _setuser) + + # Define password property + def _setpassword(self, password): + self._password = password + + def _getpassword(self): + return self._password + + password = property(_getpassword, _setpassword) + + # Define epsg property + def _setepsg(self, epsg): + self._epsg = epsg + + def _getepsg(self): + return self._epsg + + epsg = property(_getepsg, _setepsg) + + # Define filters property + def _setfilters(self, filters): + self._filters = filters + + def _getfilters(self): + return self._filters + + filters = property(_getfilters, _setfilters) + + # Define geom_column property + def _setgeom_column(self, geom_column): + self._geom_column = geom_column + + def _getgeom_column(self): + return self._geom_column + + geom_column = property(_getgeom_column, _setgeom_column) + + # Define engine property + def _setengine(self, engine): + self._engine = engine + + def _getengine(self): + return self._engine + + engine = property(_getengine, _setengine) + + # etc... + + def initialize_engine(self): + self._engine = self.get_engine(self.get_connection_string()) + + self.get_table_info() + self.verify() + + # methods additional in PostgisIO + + def get_engine(self, connection_string): + """ + Create and return a SQLAlchemy engine object + + :param connection_string: Database connection string + :return: SQLAlchemy Engine object + """ + if connection_string not in sqlengines: + sqlengines[connection_string] = create_engine( + self.get_connection_string()) + return sqlengines[connection_string] + + def verify(self): + """ + Make sure that all PostgisIO columns exist in the actual table + """ + for col in self._columns: + if col not in self._table_obj.columns.keys(): + raise Exception('{} column not found in {}'.format( + col, self._table_obj)) + + def get_connection_string(self): + """ + Get connection string based on host, dbname, username, password + + :return: Postgres connection string for SQLAlchemy + """ + auth = '' + if self._user: + auth = self._user + if self._password: + auth = auth + ':' + self._password + if auth: + auth += '@' + conn_string = 'postgresql://{auth}{host}/{dbname}'.format( + auth=auth, host=self._hostname, dbname=self._dbname) + + return conn_string + + def get_epsg(self): + """ + Get the EPSG code of the data + + :return: EPSG code + """ + return self._epsg + + def get_table_info(self): + """ + Use SQLALchemy reflection to gather data on the table, including the + geometry column, geometry type, and EPSG code, and assign to the + PostgisIO object's attributes. + """ + epsg = None + meta = MetaData() + table_obj = Table(self._table, meta, + autoload=True, autoload_with=self._engine) + if not self._columns: + self._columns = table_obj.columns.keys() + geo_cols = [(col.name, col.type) for col in table_obj.columns + if hasattr(col.type, 'srid')] + if geo_cols: + geo_col = geo_cols[0] + self._geom_column = geo_col[0] + geo_obj = geo_col[1] + if self._geom_column not in self._columns: + self._columns.append(self._geom_column) + if hasattr(geo_obj, 'srid'): + epsg = geo_obj.srid + if epsg == -1: + epsg = 4326 + if hasattr(geo_obj, 'geometry_type'): + self._geometry_type = geo_obj.geometry_type + + self._epsg = epsg + self._table_obj = table_obj + self._meta = meta + + def get_geometry_type(self): + """ + Get the geometry type of the data + + :return: Geometry type + """ + return self._geometry_type + + def get_query(self): + """ + Formulate a query string and parameter list based on the + table name, columns, and filter + + :return: Query string + """ + columns = ','.join(['"{}"'.format(x) for x in self._columns]) + query = 'SELECT {} FROM "{}"'.format(columns, self._table) + filter_params = [] + if self._filters: + filter_sql, filter_params = filter_postgis(self._filters) + query += ' WHERE {}'.format(filter_sql) + query += ';' + return str(text(query)), filter_params diff --git a/gaia/gaia_process.py b/gaia/gaia_process.py deleted file mode 100644 index 93c0e03..0000000 --- a/gaia/gaia_process.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# Copyright Kitware Inc. and Epidemico Inc. -# -# Licensed under the Apache License, Version 2.0 ( the "License" ); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################### -import os -import uuid -from gaia import get_abspath, config, GaiaException, types, formats - - -class GaiaProcess(object): - """ - Abstract class to define a geospatial process - """ - - # TODO: Enforce required inputs and args - required_inputs = [] - required_args = [] - optional_args = [ - { - 'name': 'parent', - 'title': 'Parent ID', - 'description': 'Parent ID (UUID format)', - 'type': str - } - - ] - default_output = None - args = None - - def __init__(self, inputs=None, output=None, parent=None, **kwargs): - self.inputs = inputs - self.output = output - self.parent = parent - self.id = str(uuid.uuid4()) - for k, v in kwargs.items(): - setattr(self, k, v) - self.validate() - - def test_arg_type(self, arg, arg_type): - """ - Try to cast a process argument to its required type. Raise an - exception if not successful. - :param arg: The argument property - :param arg_type: The required argument type (int, str, etc) - """ - try: - arg_type(getattr(self, arg)) - except Exception: - raise GaiaException('Required argument {} must be of type {}' - .format(arg, arg_type)) - - def validate(self): - """ - Ensure that all required inputs and arguments are present. - """ - # for input in self.inputs: - # if input. - input_types = [] - errors = [] - - for input in self.inputs: - type = input.type - if type == types.PROCESS: - for t in [i for i in dir(types) if not i.startswith("__")]: - if any((True for x in input.default_output if x in getattr( - formats, t, []))): - type = getattr(types, t) - break - input_types.append(type) - - for i, req_input in enumerate(self.required_inputs): - if i >= len(input_types): - errors.append("Not enough inputs for process") - elif req_input['type'] != input_types[i]: - errors.append("Input #{} is of incorrect type.".format(i+1)) - - if len(input_types) > len(self.required_inputs): - if (self.required_inputs[-1]['max'] is not None and - len(input_types) > len(self.required_inputs) + - self.required_inputs[-1]['max']-1): - errors.append("Incorrect # of inputs; expected {}".format( - len(self.required_inputs))) - else: - for i in range(len(self.required_inputs)-1, len(input_types)): - if input_types[i] != self.required_inputs[-1]['type']: - errors.append( - "Input #{} is of incorrect type.".format(i + 1)) - if errors: - raise GaiaException('\n'.join(errors)) - for item in self.required_args: - arg, arg_type = item['name'], item['type'] - if not hasattr(self, arg) or getattr(self, arg) is None: - raise GaiaException('Missing required argument {}'.format(arg)) - self.test_arg_type(arg, arg_type) - if 'options' in item and getattr(self, arg) not in item['options']: - raise GaiaException('Invalid value for {}'.format(item['name'])) - for item in self.optional_args: - arg, arg_type = item['name'], item['type'] - if hasattr(self, arg) and getattr(self, arg) is not None: - self.test_arg_type(arg, arg_type) - argval = getattr(self, arg) - if 'options' in item and argval not in item['options']: - raise GaiaException( - 'Invalid value for {}'.format(item['name'])) - - def compute(self): - """ - Abstract method for running process - """ - raise NotImplementedError() - - def purge(self): - """ - Delete the process output - """ - self.output.delete() - - def get_outpath(self, uri=config['gaia']['output_path']): - """ - Get the output path of the process - - :param uri: base output path - :return: Process output path - """ - ids_path = '{}/{}'.format( - self.parent, self.id) if self.parent else self.id - return get_abspath( - os.path.join(uri, ids_path, - '{}{}'.format(self.id, self.default_output[0]))) - - def get_input_classes(self): - """ - Get the unique set of input classes - - :return: set of classes - """ - io_classes = set() - for input in self.inputs: - input_class = input.__class__.__name__ - if 'Process' not in input_class: - io_classes.add(input.__class__.__name__) - else: - io_classes = io_classes.union(input.process.get_input_classes()) - return io_classes diff --git a/gaia/geo/__init__.py b/gaia/geo/__init__.py index 749a070..fb4ab66 100644 --- a/gaia/geo/__init__.py +++ b/gaia/geo/__init__.py @@ -1,4 +1 @@ from gaia.geo.gdal_functions import * -from gaia.geo.processes_raster import * -from gaia.geo.processes_vector import * -from gaia.geo.geo_inputs import * diff --git a/gaia/geo/gdal_functions.py b/gaia/geo/gdal_functions.py index 3cced2c..40c92a0 100644 --- a/gaia/geo/gdal_functions.py +++ b/gaia/geo/gdal_functions.py @@ -28,6 +28,7 @@ import rasterio import rasterio.features from gaia import GaiaException +from gaia.util import get_uri_extension, UnsupportedFormatException try: import gdalnumeric except ImportError: @@ -36,6 +37,7 @@ import osr from PIL import Image, ImageDraw from osgeo.gdal_array import BandReadAsArray, BandWriteArray +import numpy as np from numpy.ma.core import MaskedConstant logger = logging.getLogger('gaia.geo.gdal_functions') @@ -56,6 +58,77 @@ 'Float64': 1.7976931348623158E+308 } +# Map of file extensions to driver name +driver_lookup = { + 'tif': 'GTiff', + 'tiff': 'GTiff' +} + + +def register_driver_name(fileExt, driverName): + driver_lookup[fileExt] = driverName + + +def write_dataset(raster_dataset, filepath, fileFormat=None): + # if no fileFormat is provide, it will be looked up from the filepath + driverName = fileFormat + if not driverName: + ext = get_uri_extension(filepath) + if ext in driver_lookup: + driverName = driver_lookup[ext] + else: + msg = 'No format provide, could not be guessed from %s' % filepath + raise UnsupportedFormatException(msg) + + output_driver = gdal.GetDriverByName(driverName) + outfile = output_driver.CreateCopy(filepath, raster_dataset, False) + logger.debug(str(outfile)) + outfile = None + + +def raster_to_numpy_array(raster_data, as_single_band=True, + old_nodata=None, new_nodata=None): + """ + Convert raster output to numpy array output + + :param raster_data: Original raster output dataset + :param as_single_band: Output data as 2D array of its first band + (default is True). If False, returns full 3D array. + :param old_nodata: Explicitly identify existing NoData values + (default None). If None, attempts to get existing NoData values stored + in the raster band. + :param new_nodata: Replace NoData values in each band with new_nodata + (default None). If new_nodata is not None but old_nodata is None + and no existing NoData value is stored in the band, uses unchanged + default ReadAsArray() return values. + :return: Converted numpy array dataset + """ + bands = as_single_band + (1 - as_single_band) * raster_data.RasterCount + nrow = raster_data.RasterYSize + ncol = raster_data.RasterXSize + dims = (bands, nrow, ncol) + + out_data_array = np.full(dims, np.nan) + + for i in range(bands): + srcband = raster_data.GetRasterBand(i + 1) + srcband_array = np.array(srcband.ReadAsArray().astype(np.float)) + if old_nodata is None: + old_nodata = srcband.GetNoDataValue() + if new_nodata is not None and old_nodata is not None: + if np.isnan(old_nodata): + srcband_array[np.isnan(srcband_array)] = new_nodata + else: + srcband_array[srcband_array == old_nodata] = new_nodata + print('NoData: Replaced ' + str(old_nodata) + + ' with ' + str(new_nodata)) + out_data_array[i, :, :] = srcband_array + + if as_single_band: + return out_data_array[0, :, :] + else: + return out_data_array + def gdal_reproject(src, dst, epsg=3857, diff --git a/gaia/geo/geo_inputs.py b/gaia/geo/geo_inputs.py deleted file mode 100644 index 0fdb135..0000000 --- a/gaia/geo/geo_inputs.py +++ /dev/null @@ -1,560 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# Copyright Kitware Inc. and Epidemico Inc. -# -# Licensed under the Apache License, Version 2.0 ( the "License" ); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################### -import json -import uuid -import numpy as np - -import os -import fiona -import geopandas -import gdal -from sqlalchemy import create_engine, MetaData, Table, text -from geoalchemy2 import Geometry -try: - import osr -except ImportError: - from osgeo import osr -import gaia -import gaia.formats as formats -import gaia.types as types -from gaia import GaiaException, sqlengines, get_abspath -from gaia.inputs import GaiaIO, FileIO, UnsupportedFormatException -from gaia.filters import filter_pandas, filter_postgis -from gaia.geo.gdal_functions import gdal_reproject -from gaia.geo.gdal_functions import rasterio_bbox, rasterio_footprint - - -class VectorMixin(object): - """ - Mixin class for common vector data IO methods - """ - def transform_data(self, outformat=None, epsg=None): - """ - Transform the IO data into the requested format and projection if - necessary. - :param outformat: Output format - :param epsg: - :return: - """ - out_data = geopandas.GeoDataFrame.copy(self.data) - if epsg and str(self.get_epsg()) != epsg: - out_data[out_data.geometry.name] = \ - self.data.geometry.to_crs(epsg=epsg) - out_data.crs = fiona.crs.from_epsg(epsg) - if outformat == formats.JSON and self.default_output in ( - formats.PANDAS, formats.JSON): - out_json = out_data.to_json() - if out_data.crs: - gj = json.loads(out_json) - gj["crs"] = { - "type": "name", - "properties": { - "name": out_data.crs["init"].upper() - } - } - return json.dumps(gj) - else: - return out_json - elif outformat in [formats.PANDAS, None]: - return out_data - else: - raise GaiaException("Format {} not supported".format(outformat)) - - -class FeatureIO(GaiaIO, VectorMixin): - """ - GeoJSON Feature Collection IO - """ - #: Data type (vector or raster) - type = types.VECTOR - - #: acceptable data format extensions - format = formats.VECTOR - - #: Default output format - default_output = formats.PANDAS - - def __init__(self, features=None, **kwargs): - """ - Create a FeatureIO object - - :param features: GeoJSON features/FeatureCollection - :param kwargs: - :return: FeatureIO object - """ - super(FeatureIO, self).__init__(**kwargs) - self.features = features - - def read(self, format=None, epsg=None): - """ - Parse the FeatureIO.features and return as a GeoDataFrame or GeoJSON - - :param format: Format of output (default is GeoDataFrame) - :param epsg: EPSG code of projection to reproject output to - :return: Dataset in requested format - """ - if not format: - format = self.default_output - if self.data is None and self.features: - if type(self.features) == str: - self.features = json.loads(self.features) - features = self.features - - if 'type' in features and features['type'] == 'FeatureCollection': - self.data = geopandas.GeoDataFrame.from_features( - self.features['features']) - else: - self.data = geopandas.GeoDataFrame.from_features(features) - if not self.data.crs: - if hasattr(self, 'crs'): - self.data.crs = self.crs - else: - self.get_epsg() - - return self.transform_data(outformat=format, epsg=epsg) - - def delete(self): - """ - Reset the IO data to None - """ - self.data = None - - -class VectorFileIO(FileIO, VectorMixin): - """ - Read and write vector file data (such as GeoJSON) - Data will be read into a geopandas dataframe. - """ - - #: Data type (vector or raster) - type = types.VECTOR - - #: acceptable data format extensions - format = formats.VECTOR - - #: Default output format - default_output = formats.PANDAS - - #: Optional arguments - optional_args = { - 'filters': list - } - - def read(self, format=None, epsg=None): - """ - Read vector data from a file (JSON, Shapefile, etc) - - :param format: Format to return data in (default is GeoDataFrame) - :param epsg: EPSG code to reproject data to - :return: Data in requested format (GeoDataFrame, GeoJSON) - """ - if not format: - format = self.default_output - if self.ext not in formats.VECTOR: - raise UnsupportedFormatException( - "Only the following vector formats are supported: {}".format( - ','.join(formats.VECTOR) - ) - ) - if self.data is None: - self.data = geopandas.read_file(self.uri) - if self.filters: - self.filter_data() - return self.transform_data(format, epsg) - - def write(self, filename=None, as_type='json'): - """ - Write data (assumed geopandas) to geojson or shapefile - - :param filename: Base filename - :param as_type: shapefile or json - :return: location of file - """ - if not filename: - filename = self.uri - self.create_output_dir(filename) - if as_type == 'json': - with open(filename, 'w') as outfile: - outfile.write(self.transform_data(outformat=formats.JSON)) - elif as_type == 'shapefile': - self.data.to_file(filename) - else: - raise NotImplementedError('{} not a valid type'.format(as_type)) - return self.uri - - def filter_data(self): - """ - Apply filters to the dataset - """ - self.data = filter_pandas(self.data, self.filters) - - -class RasterFileIO(FileIO): - """Read and write raster data (GeoTIFF)""" - - #: Data type (vector or raster) - type = types.RASTER - - #: acceptable data format extensions - format = formats.RASTER - - #: Default output format - default_output = formats.RASTER - - def read(self, as_numpy_array=False, as_single_band=True, - old_nodata=None, new_nodata=None, epsg=None): - """ - Read data from a raster dataset - - :param as_numpy_array: Output data as numpy - (default is False i.e. raster osgeo.gdal.Dataset) - :param as_single_band: Output data as 2D array of its first band - (default is True). If False, returns full 3D array. - :param old_nodata: Explicitly identify existing NoData values - (default None). If None, attempts to get existing NoData values stored - in the raster band. - :param new_nodata: Replace NoData values in each band with new_nodata - (default None). If new_nodata is not None but old_nodata is None - and no existing NoData value is stored in the band, uses unchanged - default ReadAsArray() return values. - :param epsg: EPSG code to reproject data to - :return: GDAL Dataset - """ - if self.ext not in formats.RASTER: - raise UnsupportedFormatException( - "Only the following raster formats are supported: {}".format( - ','.join(formats.RASTER) - ) - ) - self.basename = os.path.basename(self.uri) - if not self.data: - self.data = gdal.Open(self.uri) - out_data = self.data - if epsg and self.get_epsg() != epsg: - out_data = reproject(self.data, epsg) - - if as_numpy_array: - return raster_to_numpy_array(out_data, as_single_band, - old_nodata, new_nodata) - else: - return out_data - - def get_bbox(self): - """Return bounding box of the dataset""" - return rasterio_bbox(self.uri) - - def get_footprint(self): - """Return footpring of the dataset""" - return rasterio_footprint(self.uri) - - -class ProcessIO(GaiaIO): - """IO for nested GaiaProcess objects""" - - #: Data type - type = types.PROCESS - - #: Optional arguments - optional_args = { - 'parent': uuid.UUID - } - - def __init__(self, process=None, parent=None, **kwargs): - """ - Create an IO object containing a GaiaProcess to run - - :param process: GaiaProcess to run - :param parent: Parent process id if any - :param kwargs: - """ - super(ProcessIO, self).__init__(**kwargs) - self.process = process - self.parent = parent - self.default_output = process.default_output - - def get_epsg(self): - """ - Return the EPSG code of the dataset - """ - if hasattr(self.process, 'output') and self.process.output.data \ - is not None: - return self.process.output.get_epsg() - else: - return self.process.inputs[0].get_epsg() - - def read(self, epsg=None): - """ - Return the process output dataset - - :param epsg: EPSG code to reproject data to - :return: Output dataset - """ - if self.data is None: - self.process.compute() - self.data = self.process.output.data - out_data = self.data - if epsg and self.get_epsg() != epsg: - out_data = reproject(self.data, epsg) - return out_data - - -class PostgisIO(GaiaIO, VectorMixin): - """Read PostGIS data""" - - #: Data type (vector or raster) - type = types.VECTOR - - #: acceptable data format extensions - format = formats.VECTOR - - #: Default output format - default_output = formats.JSON - - hostname = None - dbname = None - user = None - password = None - table = None - columns = [] - filters = None - geom_column = 'the_geom' - epsg = None - engine = None - meta = None - table_obj = None - - def __init__(self, table, **kwargs): - """ - Instantiate a PostgisIO object for querying a PostGIS table - - :param table: PostgreSQL table name - :param kwargs: Optional connection arguments, - obtained from config if not present - """ - super(PostgisIO, self).__init__(**kwargs) - self.table = table - self.host = kwargs.get('host') or gaia.config['gaia_postgis']['host'] - self.dbname = kwargs.get( - 'dbname') or gaia.config['gaia_postgis']['dbname'] - self.user = kwargs.get('user') or gaia.config['gaia_postgis']['user'] - self.password = kwargs.get( - 'password') or gaia.config['gaia_postgis']['password'] - self.engine = self.get_engine(self.get_connection_string()) - self.get_table_info() - self.verify() - - def get_engine(self, connection_string): - """ - Create and return a SQLAlchemy engine object - - :param connection_string: Database connection string - :return: SQLAlchemy Engine object - """ - if connection_string not in sqlengines: - sqlengines[connection_string] = create_engine( - self.get_connection_string()) - return sqlengines[connection_string] - - def verify(self): - """ - Make sure that all PostgisIO columns exist in the actual table - """ - for col in self.columns: - if col not in self.table_obj.columns.keys(): - raise Exception('{} column not found in {}'.format( - col, self.table_obj)) - - def get_connection_string(self): - """ - Get connection string based on host, dbname, username, password - - :return: Postgres connection string for SQLAlchemy - """ - auth = '' - if self.user: - auth = self.user - if self.password: - auth = auth + ':' + self.password - if auth: - auth += '@' - conn_string = 'postgresql://{auth}{host}/{dbname}'.format( - auth=auth, host=self.host, dbname=self.dbname) - - return conn_string - - def get_epsg(self): - """ - Get the EPSG code of the data - - :return: EPSG code - """ - return self.epsg - - def get_table_info(self): - """ - Use SQLALchemy reflection to gather data on the table, including the - geometry column, geometry type, and EPSG code, and assign to the - PostgisIO object's attributes. - """ - epsg = None - meta = MetaData() - table_obj = Table(self.table, meta, - autoload=True, autoload_with=self.engine) - if not self.columns: - self.columns = table_obj.columns.keys() - geo_cols = [(col.name, col.type) for col in table_obj.columns - if hasattr(col.type, 'srid')] - if geo_cols: - geo_col = geo_cols[0] - self.geom_column = geo_col[0] - geo_obj = geo_col[1] - if self.geom_column not in self.columns: - self.columns.append(self.geom_column) - if hasattr(geo_obj, 'srid'): - epsg = geo_obj.srid - if epsg == -1: - epsg = 4326 - if hasattr(geo_obj, 'geometry_type'): - self.geometry_type = geo_obj.geometry_type - - self.epsg = epsg - self.table_obj = table_obj - self.meta = meta - - def get_geometry_type(self): - """ - Get the geometry type of the data - - :return: Geometry type - """ - return self.geometry_type - - def get_query(self): - """ - Formulate a query string and parameter list based on the - table name, columns, and filter - - :return: Query string - """ - columns = ','.join(['"{}"'.format(x) for x in self.columns]) - query = 'SELECT {} FROM "{}"'.format(columns, self.table) - filter_params = [] - if self.filters: - filter_sql, filter_params = filter_postgis(self.filters) - query += ' WHERE {}'.format(filter_sql) - query += ';' - return str(text(query)), filter_params - - def read(self, format=None, epsg=None): - """ - Load the table data into a GeoDataFrame, and return as a GeoDataFrame - or GeoJSON object - - :param format: Output format (default is GeoDataFrame) - :param epsg: EPSG code to reproject the data to - :return: data in requested format - """ - if self.data is None: - query, params = self.get_query() - self.data = df_from_postgis(self.engine, query, params, - self.geom_column, self.epsg) - return self.transform_data(outformat=format, epsg=epsg) - - -def df_from_postgis(engine, query, params, geocolumn, epsg): - """ - Run a PostGIS query and return results as a GeoDataFrame - - :param engine: SQLAlchemy database connection engine - :param query: Query to run - :param params: Query parameter list - :param geocolumn: Geometry column of query - :param epsg: EPSG code of geometry output - :return: GeoDataFrame - """ - data = geopandas.GeoDataFrame.from_postgis( - query, - engine, - geom_col=geocolumn, - crs={'init': 'epsg:{}'.format(epsg)}, - params=params) - return data - - -def reproject(dataset, epsg): - """ - Reproject a dataset to the specified EPSG code - - :param dataset: Dataset to reproject - :param epsg: EPSG code to reproject to - :return: Reprojected data - """ - dataclass = dataset.__class__.__name__ - # Run appropriate reprojection method - if dataclass == 'GeoDataFrame': - repro = geopandas.GeoDataFrame.copy(dataclass) - repro[repro.geometry.name] = repro.geometry.to_crs(epsg=epsg) - repro.crs = fiona.crs.from_epsg(epsg) - elif dataclass == 'Dataset': - repro = gdal_reproject(dataset, '', epsg=epsg) - return repro - - -def raster_to_numpy_array(raster_data, as_single_band=True, - old_nodata=None, new_nodata=None): - """ - Convert raster output to numpy array output - - :param raster_data: Original raster output dataset - :param as_single_band: Output data as 2D array of its first band - (default is True). If False, returns full 3D array. - :param old_nodata: Explicitly identify existing NoData values - (default None). If None, attempts to get existing NoData values stored - in the raster band. - :param new_nodata: Replace NoData values in each band with new_nodata - (default None). If new_nodata is not None but old_nodata is None - and no existing NoData value is stored in the band, uses unchanged - default ReadAsArray() return values. - :return: Converted numpy array dataset - """ - bands = as_single_band + (1 - as_single_band) * raster_data.RasterCount - nrow = raster_data.RasterYSize - ncol = raster_data.RasterXSize - dims = (bands, nrow, ncol) - - out_data_array = np.full(dims, np.nan) - - for i in range(bands): - srcband = raster_data.GetRasterBand(i + 1) - srcband_array = np.array(srcband.ReadAsArray().astype(np.float)) - if old_nodata is None: - old_nodata = srcband.GetNoDataValue() - if new_nodata is not None and old_nodata is not None: - if np.isnan(old_nodata): - srcband_array[np.isnan(srcband_array)] = new_nodata - else: - srcband_array[srcband_array == old_nodata] = new_nodata - print('NoData: Replaced ' + str(old_nodata) + - ' with ' + str(new_nodata)) - out_data_array[i, :, :] = srcband_array - - if as_single_band: - return out_data_array[0, :, :] - else: - return out_data_array diff --git a/gaia/geo/processes_raster.py b/gaia/geo/processes_raster.py deleted file mode 100644 index cfa7e01..0000000 --- a/gaia/geo/processes_raster.py +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# Copyright Kitware Inc. and Epidemico Inc. -# -# Licensed under the Apache License, Version 2.0 ( the "License" ); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################### -import logging -import gaia.formats as formats -from gaia.gaia_process import GaiaProcess -from gaia.geo.gdal_functions import gdal_calc, gdal_clip -from gaia.geo.geo_inputs import RasterFileIO -from gaia import types -from osgeo import gdal - -logger = logging.getLogger('gaia.geo') - - -class SubsetProcess(GaiaProcess): - """ - Generates a raster dataset representing the portion of the input raster - dataset that is contained within a vector polygon. - """ - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Image to subset', - 'type': types.RASTER, - 'max': 1 - }, - {'description': 'Subset area:', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format for the process - default_output = formats.RASTER - - def __init__(self, **kwargs): - """ - Create a process to subset a raster by a vector polygon - - :param clip_io: IO object containing vector polygon data - :param raster_io: IO object containing raster data - :param kwargs: - :return: SubsetProcess object - """ - super(SubsetProcess, self).__init__(**kwargs) - if not self.output: - self.output = RasterFileIO(name='result', uri=self.get_outpath()) - - def compute(self): - """ - Runs the subset computation, creating a raster dataset as output. - """ - raster, clip = self.inputs[0], self.inputs[1] - raster_img = raster.read() - clip_df = clip.read(epsg=raster.get_epsg()) - # Merge all features in vector input - raster_output = self.output.uri - self.output.create_output_dir(raster_output) - clip_json = clip_df.geometry.unary_union.__geo_interface__ - self.output.data = gdal_clip(raster_img, raster_output, clip_json) - - -class RasterMathProcess(GaiaProcess): - """ - Performs raster math/algebra based on supplied arguments. - Inputs should consist of at least one raster IO object. - Required arg is 'calc', an equation for the input rasters. - Example: "A + B / (C * 2.4)". The letters in the equation - should correspond to the names of the inputs. - """ - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Image', - 'type': types.RASTER, - 'max': None - } - ] - #: Required arguments for the process - required_args = [{ - 'name': 'calc', - 'title': 'Calculation', - 'description': 'Equation to apply to rasters (ex: "(A+B)/2"', - 'type': str - }] - #: Default output format for the process - default_output = formats.RASTER - - #: Default input raster bands to process - bands = None - #: Default NODATA value for raster input - nodata = None - #: Use all bands in raster input (default: False) - all_bands = False - #: Default data type for raster (UInt32, Float, etc) - output_type = None - #: Image output format (default='GTiff') - output_format = 'GTiff' - - def __init__(self, calc=None, **kwargs): - """ - Initialize a RasterMathProcess object. - - :param calc: A text representation of the calculation to make. - :param kwargs: Other keyword arguments - """ - self.calc = calc - super(RasterMathProcess, self).__init__(**kwargs) - if not self.output: - self.output = RasterFileIO(name='result', uri=self.get_outpath()) - - def compute(self): - """ - Run the RasterMath process, generating a raster output dataset. - """ - first = self.inputs[0] - epsg = first.get_epsg() - rasters = [x.read(epsg=epsg) for x in self.inputs] - bands = self.bands - nodata = self.nodata - all_bands = self.all_bands - otype = self.output_type - self.output.create_output_dir(self.output.uri) - self.output.data = gdal_calc(self.calc, - self.output.uri, - rasters, - bands=bands, - nodata=nodata, - allBands=all_bands, - output_type=otype, - format=self.output_format) - - -class MergeProcess(GaiaProcess): - """ - Merge multiple raster datasets into one multi-band raster dataset. - """ - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Images to merge', - 'type': types.RASTER, - 'max': None - } - ] - - #: Default output format for the process - default_output = formats.RASTER - - def __init__(self, **kwargs): - """ - Create a process to merge rasters. - - :param inputs: Images to merge. - :param kwargs: Other keyword arguments. - :return: MergeProcess object - """ - super(MergeProcess, self).__init__(**kwargs) - if not self.output: - self.output = RasterFileIO(name='result', uri=self.get_outpath()) - - def compute(self): - """ - Runs the multi-band merge, creating a raster dataset as output. - """ - - # Read files. - input_images = [img.read() for img in self.inputs] - - # Get band counts by image. - input_band_counts = [img.RasterCount for img in input_images] - - # First raster for formatting. - fmt = input_images[0] - - # Generate an output image. - driver_mem = gdal.GetDriverByName('MEM') - output_image = driver_mem.Create('', - fmt.RasterXSize, - fmt.RasterYSize, - sum(input_band_counts), - fmt.GetRasterBand(1).DataType - ) - - # Merge bands into output image. - current_band = 0 # For iterating through all bands. - for img in range(0, len(input_images)): - for band in range(0, input_band_counts[img]): - current_band = current_band + 1 - band_to_write = output_image.GetRasterBand(current_band) - band_to_write.WriteArray(input_images[img] - .GetRasterBand(band + 1) - .ReadAsArray() - ) - - # Merged raster. - self.output.data = output_image diff --git a/gaia/geo/processes_vector.py b/gaia/geo/processes_vector.py deleted file mode 100644 index effe965..0000000 --- a/gaia/geo/processes_vector.py +++ /dev/null @@ -1,1237 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# Copyright Kitware Inc. and Epidemico Inc. -# -# Licensed under the Apache License, Version 2.0 ( the "License" ); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################### -import json -import logging -import re -import fiona -import numpy as np -import pandas as pd -try: - import osr -except ImportError: - from osgeo import osr -from geopandas import GeoSeries -from geopandas import GeoDataFrame -import gaia.formats as formats -import gaia.types as types -from gaia import GaiaException -from gaia.geo import GaiaProcess -from gaia.geo.gdal_functions import gdal_zonalstats -from gaia.geo.geo_inputs import VectorFileIO, df_from_postgis - -logger = logging.getLogger('gaia.geo') - - -class BufferProcess(GaiaProcess): - """ - Generates a buffer polygon around the geometries of the input data. - The size of the buffer is determined by the 'buffer_size' args key - and the unit of measure should be meters. If inputs are not in a - metric projection they will be reprojected to EPSG:3857. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Feature dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Required arguments, data types as dict - required_args = [ - { - 'name': 'buffer_size', - 'title': 'Buffer Size', - 'description': 'Size of the buffer in meters', - 'type': float - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, inputs=None, buffer_size=None, **kwargs): - self.buffer_size = buffer_size - super(BufferProcess, self).__init__(inputs, **kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculate the buffer using pandas GeoDataFrames - - :return: Buffer as a pandas GeoDataFrame - """ - featureio = self.inputs[0] - original_projection = featureio.get_epsg() - epsg = original_projection - srs = osr.SpatialReference() - srs.ImportFromEPSG(int(original_projection)) - if not srs.GetAttrValue('UNIT').lower().startswith('met'): - epsg = 3857 - else: - original_projection = None - feature_df = featureio.read(epsg=epsg) - buffer = GeoSeries(feature_df.buffer(self.buffer_size).unary_union) - buffer_df = GeoDataFrame(geometry=buffer) - buffer_df.crs = feature_df.crs - if original_projection: - buffer_df[buffer_df.geometry.name] = buffer_df.to_crs( - epsg=original_projection) - buffer_df.crs = fiona.crs.from_epsg(original_projection) - return buffer_df - - def calc_postgis(self): - """ - Calculate the buffer using PostGIS - - :return: Buffer as a pandas GeoDataFrame - """ - pg_io = self.inputs[0] - original_projection = pg_io.epsg - io_query, params = pg_io.get_query() - srs = osr.SpatialReference() - srs.ImportFromEPSG(int(original_projection)) - - if not srs.GetAttrValue('UNIT').lower().startswith('met'): - geom_query = 'ST_Transform({}, {})'.format( - pg_io.geom_column, 3857) - else: - original_projection = None - buffer_query = 'ST_Union(ST_Buffer({}, %s))'.format(geom_query) - if original_projection: - buffer_query = 'ST_Transform({}, {})'.format(buffer_query, - original_projection) - - query = 'SELECT {buffer} as {geocol} ' \ - 'FROM ({query}) as foo'.format(buffer=buffer_query, - geocol=pg_io.geom_column, - query=io_query.rstrip(';')) - params.insert(0, self.buffer_size) - logger.debug(query) - return df_from_postgis(pg_io.engine, query, params, - pg_io.geom_column, pg_io.epsg) - - def compute(self): - """ - Run the buffer process. - """ - if self.inputs[0].__class__.__name__ == 'PostgisIO': - data = self.calc_postgis() - else: - data = self.calc_pandas() - self.output.data = data - self.output.write() - - -class WithinProcess(GaiaProcess): - """ - Similar to SubsetProcess but for vectors: calculates the features within - a vector dataset that are within (or whose centroids are within) the - polygons of a second vector dataset. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Feature dataset', - 'type': types.VECTOR, - 'max': 1 - }, - {'description': 'Within dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, **kwargs): - super(WithinProcess, self).__init__(**kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculate the within process using pandas GeoDataFrames - - :return: within result as a GeoDataFrame - """ - first, second = self.inputs[0], self.inputs[1] - first_df = first.read() - second_df = second.read(epsg=first.get_epsg()) - first_within = first_df[first_df.geometry.within( - second_df.geometry.unary_union)] - return first_within - - def calc_postgis(self): - """ - Calculate the within process using PostGIS - - :return: within result as a GeoDataFrame - """ - first = self.inputs[0] - within_queries = [] - within_params = [] - geom0 = first.geom_column - epsg = first.epsg - geom1 = self.inputs[1].geom_column - for pg_io in self.inputs: - io_query, params = pg_io.get_query() - within_queries.append(io_query.rstrip(';')) - within_params.extend(params) - joinstr = ' AND ' if 'WHERE' in within_queries[0].upper() else ' WHERE ' - query = '{query0} {join} ST_Within(ST_Transform({geom0},{epsg}), ' \ - '(SELECT ST_Union(ST_TRANSFORM({geom1},{epsg})) ' \ - 'from ({query1}) as q2))'\ - .format(query0=within_queries[0], join=joinstr, geom0=geom0, - geom1=geom1, epsg=epsg, query1=within_queries[1]) - return df_from_postgis(first.engine, query, params, geom0, epsg) - - def compute(self): - """ - Run the Within process - """ - if len(self.inputs) != 2: - raise GaiaException('WithinProcess requires 2 inputs') - input_classes = list(self.get_input_classes()) - use_postgis = (len(input_classes) == 1 and - input_classes[0] == 'PostgisIO') - data = self.calc_postgis() if use_postgis else self.calc_pandas() - self.output.data = data - self.output.write() - - -class IntersectsProcess(GaiaProcess): - """ - Calculates the features within the first vector dataset that intersect - the features of the second vector dataset. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Feature dataset', - 'type': types.VECTOR, - 'max': 1 - }, - {'description': 'Intersect dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, **kwargs): - super(IntersectsProcess, self).__init__(**kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculate the intersection using pandas GeoDataFrame - - :return: intersection as a DataFrame object - """ - first, second = self.inputs[0], self.inputs[1] - first_df = first.read() - second_df = second.read(epsg=first.get_epsg()) - first_intersects = first_df[first_df.geometry.intersects( - second_df.geometry.unary_union)] - return first_intersects - - def calc_postgis(self): - """ - Calculate the intersection using PostGIS - - :return: intersection as a GeoDataFrame object - """ - int_queries = [] - int_params = [] - first = self.inputs[0] - geom0 = first.geom_column - epsg = first.epsg - geom1 = self.inputs[1].geom_column - for pg_io in self.inputs: - io_query, params = pg_io.get_query() - int_queries.append(io_query.rstrip(';')) - int_params.extend(params) - joinstr = ' AND ' if 'WHERE' in int_queries[0].upper() else ' WHERE ' - query = '{query0} {join} (SELECT ST_Intersects(ST_Transform(' \ - '{table}.{geom0},{epsg}), ST_Union(ST_Transform(' \ - 'q2.{geom1},{epsg}))) from ({query1}) as q2)'\ - .format(query0=int_queries[0], join=joinstr, geom0=geom0, - geom1=geom1, epsg=epsg, query1=int_queries[1], - table=first.table) - return df_from_postgis(first.engine, - query, int_params, geom0, epsg) - - def compute(self): - """ - Run the intersection process - """ - input_classes = list(self.get_input_classes()) - use_postgis = (len(input_classes) == 1 and - input_classes[0] == 'PostgisIO') - data = self.calc_postgis() if use_postgis else self.calc_pandas() - self.output.data = data - self.output.write() - logger.debug(self.output) - - -class DisjointProcess(GaiaProcess): - """ - Calculates which features of the first vector dataset do not - intersect the features of the second dataset. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Feature dataset', - 'type': types.VECTOR, - 'max': 1 - }, - {'description': 'Disjoint dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, **kwargs): - super(DisjointProcess, self).__init__(**kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculat the disjoin between inputs using pandas GeoDataFrames - - :return: disjoin as a GeoDataFrame object - """ - first, second = self.inputs[0], self.inputs[1] - first_df = first.read() - second_df = second.read(epsg=first.get_epsg()) - first_difference = first_df[first_df.geometry.disjoint( - second_df.geometry.unary_union)] - return first_difference - - def calc_postgis(self): - """ - Calculat the disjoin between inputs using PostGIS - - :return: disjoin as a GeoDataFrame object - """ - diff_queries = [] - diff_params = [] - first = self.inputs[0] - geom0, epsg = first.geom_column, first.epsg - geom1 = self.inputs[1].geom_column - for pg_io in self.inputs: - io_query, params = pg_io.get_query() - diff_queries.append(io_query.rstrip(';')) - diff_params.extend(params) - joinstr = ' AND ' if 'WHERE' in diff_queries[0].upper() else ' WHERE ' - query = '{query0} {join} (SELECT ST_Disjoint(ST_Transform(' \ - '{table}.{geom0},{epsg}), ST_Union(ST_Transform(' \ - 'q2.{geom1},{epsg}))) from ({query1}) as q2)'\ - .format(query0=diff_queries[0], join=joinstr, geom0=geom0, - geom1=geom1, epsg=epsg, query1=diff_queries[1], - table=first.table) - return df_from_postgis(first.engine, query, diff_params, geom0, epsg) - - def compute(self): - """ - Run the disjoint process - """ - input_classes = list(self.get_input_classes()) - use_postgis = (len(input_classes) == 1 and - input_classes[0] == 'PostgisIO') - data = self.calc_postgis() if use_postgis else self.calc_pandas() - self.output.data = data - self.output.write() - logger.debug(self.output) - - -class UnionProcess(GaiaProcess): - """ - Combines two vector datasets into one. - They datasets should have the same columns. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'First dataset', - 'type': types.VECTOR, - 'max': 1 - }, - {'description': 'Second dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, **kwargs): - super(UnionProcess, self).__init__(**kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculate the union using pandas GeoDataFrames - - :return: union result as a GeoDataFrame - """ - first, second = self.inputs[0], self.inputs[1] - first_df = first.read() - second_df = second.read(epsg=first.get_epsg()) - if ''.join(first_df.columns) != ''.join(second_df.columns): - raise GaiaException('Inputs must have the same columns') - uniondf = GeoDataFrame(pd.concat([first_df, second_df])) - return uniondf - - def calc_postgis(self): - """ - Calculate the union using PostGIS - - :return: union result as a GeoDataFrame - """ - union_queries = [] - union_params = [] - first = self.inputs[0] - second = self.inputs[1] - geom0, epsg = first.geom_column, first.epsg - geom1, epsg1 = second.geom_column, second.epsg - if ''.join(first.columns) != ''.join(second.columns): - raise GaiaException('Inputs must have the same columns') - for pg_io in self.inputs: - io_query, params = pg_io.get_query() - union_queries.append(io_query.rstrip(';')) - union_params.extend(params) - - if epsg1 != epsg: - geom1_query = 'ST_Transform({},{})'.format(geom1, epsg) - union_queries[1] = union_queries[1].replace( - '"{}"'.format(geom1), geom1_query) - query = '({query0}) UNION ({query1})'\ - .format(query0=union_queries[0], query1=union_queries[1]) - return df_from_postgis(first.engine, - query, union_params, geom0, epsg) - - def compute(self): - """ - Run the union process. - """ - input_classes = list(self.get_input_classes()) - use_postgis = (len(input_classes) == 1 and - input_classes[0] == 'PostgisIO') - data = self.calc_postgis() if use_postgis else self.calc_pandas() - self.output.data = data - self.output.write() - logger.debug(self.output) - - -class CentroidProcess(GaiaProcess): - """ - Calculates the centroid point of a vector dataset. - """ - - #: List of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Line/Polygon dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - optional_args = [{ - 'name': 'combined', - 'title': 'Combined', - 'description': 'Get centroid of features combined (default False)', - 'type': bool, - - }] - - #: Default output format - default_output = formats.JSON - - def __init__(self, combined=False, **kwargs): - super(CentroidProcess, self).__init__(**kwargs) - self.combined = combined - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculate the centroid using pandas GeoDataFrames - - :return: centroid as a GeoDataFrame - """ - df_in = self.inputs[0].read() - df = GeoDataFrame(df_in.copy(), geometry=df_in.geometry.name) - if self.combined: - gs = GeoSeries(df.geometry.unary_union.centroid, - name=df_in.geometry.name) - return GeoDataFrame(gs) - else: - df[df.geometry.name] = df.geometry.centroid - return df - - def calc_postgis(self): - """ - Calculate the centroid using PostGIS - - :return: centroid as a GeoDataFrame - """ - pg_io = self.inputs[0] - io_query, params = pg_io.get_query() - geom0, epsg = pg_io.geom_column, pg_io.epsg - if self.combined: - query = 'SELECT ST_Centroid(ST_Union({geom})) as {geom}' \ - ' from ({query}) as foo'.format(geom=geom0, - query=io_query.rstrip(';')) - else: - query = re.sub('"{}"'.format(geom0), - 'ST_Centroid("{geom}") as {geom}'.format( - geom=geom0), io_query, 1) - return df_from_postgis(pg_io.engine, query, params, geom0, epsg) - - def compute(self): - """ - Run the centroid process - """ - use_postgis = self.inputs[0].__class__.__name__ == 'PostgisIO' - data = self.calc_postgis() if use_postgis else self.calc_pandas() - self.output.data = data - self.output.write() - logger.debug(self.output) - - -class DistanceProcess(GaiaProcess): - """ - Calculates the minimum distance from each feature of the first dataset - to the nearest feature of the second dataset. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'From dataset', - 'type': types.VECTOR, - 'max': 1 - }, - {'description': 'To dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, **kwargs): - super(DistanceProcess, self).__init__(**kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculate the minimum distance between features using pandas - GeoDataFrames. - - :return: Minimum distance results as a GeoDataFrame - """ - first = self.inputs[0] - original_projection = first.get_epsg() - epsg = original_projection - srs = osr.SpatialReference() - srs.ImportFromEPSG(int(original_projection)) - if not srs.GetAttrValue('UNIT').lower().startswith('met'): - epsg = 3857 - else: - original_projection = None - first_df = first.read(epsg=epsg) - first_gs = first_df.geometry - first_length = len(first_gs) - second_df = self.inputs[1].read(epsg=epsg) - second_gs = second_df.geometry - min_dist = np.empty(first_length) - for i, first_features in enumerate(first_gs): - min_dist[i] = np.min([first_features.distance(second_features) - for second_features in second_gs]) - - distance_df = GeoDataFrame.copy(first_df) - distance_df['distance'] = min_dist - distance_df.sort_values('distance', inplace=True) - if original_projection: - distance_df[distance_df.geometry.name] = \ - distance_df.geometry.to_crs(epsg=original_projection) - return distance_df - - def calc_postgis(self): - """ - Calculate the minimum distance between features using PostGIS - K-Nearest Neighbor (KNN) query - - :return: Minimum distance results as a GeoDataFrame - """ - diff_queries = [] - diff_params = [] - first = self.inputs[0] - geom0, epsg = first.geom_column, first.epsg - srs = osr.SpatialReference() - srs.ImportFromEPSG(int(epsg)) - if not srs.GetAttrValue('UNIT').lower().startswith('met'): - epsg = 3857 - - geom1 = self.inputs[1].geom_column - for pg_io in self.inputs: - io_query, params = pg_io.get_query() - diff_queries.append(io_query.rstrip(';')) - diff_params.insert(0, params) - - diff_params = [item for x in diff_params for item in x] - dist1 = """, (SELECT ST_Distance( - ST_Transform({table0}.{geom0},{epsg}), - ST_Transform(query2.{geom1},{epsg})) - as distance - """.format(table0=self.inputs[0].table, - geom0=geom0, - geom1=geom1, - epsg=epsg) - - dist2 = """ - ORDER BY {table0}.{geom0} <#> query2.{geom1} LIMIT 1) FROM - """.format(table0=self.inputs[0].table, - geom0=geom0, - geom1=geom1, - epsg=epsg) - - dist3 = ' ORDER BY distance ASC' - query = re.sub('FROM', dist1 + ' FROM (' + diff_queries[1] + - ') as query2 ' + dist2, diff_queries[0]) + dist3 - return df_from_postgis(first.engine, query, diff_params, geom0, epsg) - - def compute(self): - """ - Run the distance process - """ - input_classes = list(self.get_input_classes()) - use_postgis = (len(input_classes) == 1 and - input_classes[0] == 'PostgisIO') - data = self.calc_postgis() if use_postgis else self.calc_pandas() - self.output.data = data - self.output.write() - - -class NearProcess(GaiaProcess): - """ - Takes two inputs, the second assumed to contain a single feature, - the first a vector dataset. Requires a distance argument, and the unit of - measure should be meters. If inputs are not in a - metric projection they will be reprojected to EPSG:3857. - Returns the features in the first input within a specified distance - of the point in the second input. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Features', - 'type': types.VECTOR, - 'max': 1 - }, - {'description': 'Point', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Required arguments, data types as dict - required_args = [{ - 'name': 'distance', - 'title': 'Distance', - 'description': 'Distance to search for features, in meters', - 'type': float - }] - - #: Default output format - default_output = formats.JSON - - def __init__(self, distance=None, **kwargs): - super(NearProcess, self).__init__(**kwargs) - self.distance = distance - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculates the features within the specified distance using pandas - - :return: results as a GeoDataFrame - """ - features = self.inputs[0] - original_projection = self.inputs[0].get_epsg() - epsg = original_projection - srs = osr.SpatialReference() - srs.ImportFromEPSG(int(original_projection)) - if not srs.GetAttrValue('UNIT').lower().startswith('met'): - epsg = 3857 - else: - original_projection = None - features_df = features.read(epsg=epsg) - features_gs = features_df.geometry - point_df = self.inputs[1].read(epsg=epsg)[:1] - point_gs = point_df.geometry - features_length = len(features_gs) - min_dist = np.empty(features_length) - for i, feature in enumerate(features_gs): - min_dist[i] = np.min([feature.distance(point_gs[0])]) - - nearby_df = GeoDataFrame.copy(features_df) - nearby_df['distance'] = min_dist - distance_max = self.distance - nearby_df = nearby_df[(nearby_df['distance'] <= distance_max)]\ - .sort_values('distance') - if original_projection: - nearby_df[nearby_df.geometry.name] = \ - nearby_df.geometry.to_crs(epsg=original_projection) - return nearby_df - - def calc_postgis(self): - """ - Calculates the features within the specified distance using PostGIS - via DWithin plus K-Nearest Neighbor (KNN) query - - :return: results as a GeoDataFrame - """ - featureio = self.inputs[0] - pointio = self.inputs[1] - feature_geom, epsg = featureio.geom_column, featureio.epsg - point_json = json.loads(pointio.read( - format=formats.JSON))['features'][0] - point_epsg = pointio.get_epsg() - - srs = osr.SpatialReference() - srs.ImportFromEPSG(int(epsg)) - if not srs.GetAttrValue('UNIT').lower().startswith('met'): - epsg = 3857 - - io_query, params = featureio.get_query() - - point_geom = 'ST_Transform(ST_SetSRID(ST_GeomFromGeoJSON(\'' \ - '{geojson}\'),{point_epsg}), {epsg})'.\ - format(geojson=json.dumps(point_json['geometry']), - point_epsg=point_epsg, epsg=epsg) - - dist1 = """, (SELECT ST_Distance( - ST_Transform({table0}.{geom0},{epsg}), - ST_Transform(point, {epsg})) - FROM {point_geom} as point - ORDER BY {table0}.{geom0} <#> point LIMIT 1) as distance FROM - """.format(table0=featureio.table, - geom0=feature_geom, - point_geom=point_geom, - epsg=epsg) - - dist2 = """ - WHERE ST_DWithin({point_geom}, - ST_Transform({table0}.{geom0},{epsg}), {distance}) - """.format(table0=featureio.table, - geom0=feature_geom, - point_geom=point_geom, - epsg=epsg, - distance=self.distance) - - dist3 = ' ORDER BY distance ASC' - query = re.sub('FROM', dist1, io_query).rstrip(';') - if 'WHERE' in query: - query = re.sub('WHERE', dist2 + ' AND ', query) - else: - query += dist2 - query += dist3 - logger.debug(query) - return df_from_postgis(featureio.engine, - query, params, feature_geom, epsg) - - def compute(self): - """ - Run the process - """ - if self.inputs[0].__class__.__name__ == 'PostgisIO': - data = self.calc_postgis() - else: - data = self.calc_pandas() - self.output.data = data - self.output.write() - - -class AreaProcess(GaiaProcess): - """ - Calculate the area of each polygon feature in a dataset. - If the dataset projection is not in metric units, it will - be temporarily reprojected to EPSG:3857 to calculate the area. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Polygon dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, **kwargs): - super(AreaProcess, self).__init__(**kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculate the areas using pandas - - :return: result as a GeoDataFrame - """ - featureio = self.inputs[0] - original_projection = featureio.get_epsg() - epsg = original_projection - srs = osr.SpatialReference() - srs.ImportFromEPSG(int(original_projection)) - if not srs.GetAttrValue('UNIT').lower().startswith('met'): - epsg = 3857 - else: - original_projection = None - feature_df = GeoDataFrame.copy(featureio.read(epsg=epsg)) - feature_df['area'] = feature_df.geometry.area - if original_projection: - feature_df[feature_df.geometry.name] = feature_df.geometry.to_crs( - epsg=original_projection) - feature_df.crs = fiona.crs.from_epsg(original_projection) - return feature_df - - def calc_postgis(self): - """ - Calculate the areas using PostGIS - - :return: result as a GeoDataFrame - """ - pg_io = self.inputs[0] - geom0, epsg = pg_io.geom_column, pg_io.epsg - srs = osr.SpatialReference() - srs.ImportFromEPSG(epsg) - - if not srs.GetAttrValue('UNIT').lower().startswith('met'): - geom_query = 'ST_Transform({}, {})'.format(geom0, 3857) - geom_query = ', ST_Area({}) as area'.format(geom_query) - query, params = pg_io.get_query() - query = query.replace('FROM', '{} FROM'.format(geom_query)) - logger.debug(query) - return df_from_postgis(pg_io.engine, query, params, geom0, epsg) - - def compute(self): - """ - Run the area process - """ - if self.inputs[0].__class__.__name__ == 'PostgisIO': - data = self.calc_postgis() - else: - data = self.calc_pandas() - self.output.data = data - self.output.write() - - -class LengthProcess(GaiaProcess): - """ - Calculate the length of each feature in a dataset. - If the dataset projection is not in metric units, it will - be temporarily reprojected to EPSG:3857 to calculate the area. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Line/Polygon dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, **kwargs): - super(LengthProcess, self).__init__(**kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculate lengths using pandas - - :return: Result as a GeoDataFrame - """ - featureio = self.inputs[0] - original_projection = featureio.get_epsg() - epsg = original_projection - srs = osr.SpatialReference() - srs.ImportFromEPSG(int(original_projection)) - if not srs.GetAttrValue('UNIT').lower().startswith('met'): - epsg = 3857 - else: - original_projection = None - feature_df = GeoDataFrame.copy(featureio.read(epsg=epsg)) - feature_df['length'] = feature_df.geometry.length - if original_projection: - feature_df[feature_df.geometry.name] = feature_df.geometry.to_crs( - epsg=original_projection) - feature_df.crs = fiona.crs.from_epsg(original_projection) - return feature_df - - def calc_postgis(self): - """ - Calculate lengths using PostGIS - - :return: Result as a GeoDataFrame - """ - featureio = self.inputs[0] - geom0, epsg = featureio.geom_column, featureio.epsg - srs = osr.SpatialReference() - srs.ImportFromEPSG(epsg) - geom_query = geom0 - geometry_type = featureio.geometry_type - length_func = 'ST_Perimeter' if 'POLYGON' in geometry_type.upper() \ - else 'ST_Length' - if not srs.GetAttrValue('UNIT').lower().startswith('met'): - geom_query = 'ST_Transform({}, {})'.format( - geom_query, 3857) - geom_query = ', {}({}) as length'.format(length_func, geom_query) - query, params = featureio.get_query() - query = query.replace('FROM', '{} FROM'.format(geom_query)) - logger.debug(query) - return df_from_postgis(featureio.engine, query, params, geom0, epsg) - - def compute(self): - """ - Run the length process - """ - if self.inputs[0].__class__.__name__ == 'PostgisIO': - data = self.calc_postgis() - else: - data = self.calc_pandas() - self.output.data = data - self.output.write() - - -class CrossesProcess(GaiaProcess): - """ - Calculates the features within the first vector dataset that cross - the combined features of the second vector dataset. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Feature dataset', - 'type': types.VECTOR, - 'max': 1 - }, - {'description': 'Crosses dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, **kwargs): - super(CrossesProcess, self).__init__(**kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculate the process using pandas - - :return: result as a GeoDataFrame - """ - first, second = self.inputs[0], self.inputs[1] - first_df = first.read() - second_df = second.read(epsg=first.get_epsg()) - first_intersects = first_df[first_df.geometry.crosses( - second_df.geometry.unary_union)] - return first_intersects - - def calc_postgis(self): - """ - Calculate the process using PostGIS - - :return: result as a GeoDataFrame - """ - cross_queries = [] - cross_params = [] - first = self.inputs[0] - geom0, epsg = first.geom_column, first.epsg - geom1 = self.inputs[1].geom_column - for pg_io in self.inputs: - io_query, params = pg_io.get_query() - cross_queries.append(io_query.rstrip(';')) - cross_params.extend(params) - joinstr = ' AND ' if 'WHERE' in cross_queries[0].upper() else ' WHERE ' - query = '{query0} {join} (SELECT ST_Crosses(ST_Transform(' \ - '{table}.{geom0},{epsg}), ST_Union(ST_Transform(' \ - 'q2.{geom1},{epsg}))) from ({query1}) as q2)'\ - .format(query0=cross_queries[0], join=joinstr, geom0=geom0, - geom1=geom1, epsg=epsg, query1=cross_queries[1], - table=first.table) - return df_from_postgis(first.engine, query, cross_params, geom0, epsg) - - def compute(self): - """ - Run the crosses process - """ - input_classes = list(self.get_input_classes()) - use_postgis = (len(input_classes) == 1 and - input_classes[0] == 'PostgisIO') - data = self.calc_postgis() if use_postgis else self.calc_pandas() - self.output.data = data - self.output.write() - logger.debug(self.output) - - -class TouchesProcess(GaiaProcess): - """ - Calculates the features within the first vector dataset that touch - edges but do not overlap in any way the features of the 2nd vector dataset. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Feature dataset', - 'type': types.VECTOR, - 'max': 1 - }, - {'description': 'Touches dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, **kwargs): - super(TouchesProcess, self).__init__(**kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculates which features touch using pandas - - :return: result as a GeoDataFrame - """ - first, second = self.inputs[0], self.inputs[1] - first_df = first.read() - second_df = second.read(epsg=first.get_epsg()) - first_intersects = first_df[first_df.geometry.touches( - second_df.geometry.unary_union)] - return first_intersects - - def calc_postgis(self): - """ - Calculates which features touch using PostGIS - - :return: result as a GeoDataFrame - """ - cross_queries = [] - cross_params = [] - first = self.inputs[0] - geom0, epsg = first.geom_column, first.epsg - geom1 = self.inputs[1].geom_column - for pg_io in self.inputs: - io_query, params = pg_io.get_query() - cross_queries.append(io_query.rstrip(';')) - cross_params.extend(params) - joinstr = ' AND ' if 'WHERE' in cross_queries[0].upper() else ' WHERE ' - query = '{query0} {join} (SELECT ST_Touches(ST_Transform(' \ - '{table}.{geom0},{epsg}), ST_Union(ST_Transform(' \ - 'q2.{geom1},{epsg}))) from ({query1}) as q2)'\ - .format(query0=cross_queries[0], join=joinstr, geom0=geom0, - geom1=geom1, epsg=epsg, query1=cross_queries[1], - table=first.table) - return df_from_postgis(first.engine, query, cross_params, geom0, epsg) - - def compute(self): - """ - Run the touches process - """ - input_classes = list(self.get_input_classes()) - use_postgis = (len(input_classes) == 1 and - input_classes[0] == 'PostgisIO') - data = self.calc_postgis() if use_postgis else self.calc_pandas() - self.output.data = data - self.output.write() - logger.debug(self.output) - - -class EqualsProcess(GaiaProcess): - """ - Calculates the features within the first vector dataset that are the same as - the features of the second vector dataset. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'First dataset', - 'type': types.VECTOR, - 'max': 1 - }, - {'description': 'Second dataset', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, **kwargs): - super(EqualsProcess, self).__init__(**kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def calc_pandas(self): - """ - Calculate which features are equal using pandas - - :return: result as a GeoDataFrame - """ - first, second = self.inputs[0], self.inputs[1] - first_df = first.read() - second_df = second.read(epsg=first.get_epsg()) - first_gs = first_df.geometry - first_length = len(first_gs) - second_gs = second_df.geometry - matches = np.empty(first_length) - for i, first_features in enumerate(first_gs): - matched = [first_features.equals(second_features) - for second_features in second_gs] - matches[i] = True if (True in matched) else False - output_df = GeoDataFrame.copy(first_df) - output_df['equals'] = matches - output_df = output_df[ - (output_df['equals'] == 1)].drop('equals', 1) - return output_df - - def calc_postgis(self): - """ - Calculate which features are equal using PostGIS - - :return: result as a GeoDataFrame - """ - equals_queries = [] - equals_params = [] - first = self.inputs[0] - geom0, epsg = first.geom_column, first.epsg - geom1 = self.inputs[1].geom_column - for pg_io in self.inputs: - io_query, params = pg_io.get_query() - equals_queries.append(io_query.rstrip(';')) - equals_params.extend(params) - joinstr = ' AND ' if 'WHERE' in equals_queries[0].upper() else ' WHERE ' - query = '{query0} {join} {geom0} IN (SELECT {geom1} ' \ - 'FROM ({query1}) as second)'.format(query0=equals_queries[0], - query1=equals_queries[1], - join=joinstr, - geom0=geom0, - geom1=geom1) - logger.debug(query) - return df_from_postgis(first.engine, query, equals_params, geom0, epsg) - - def compute(self): - """ - Run the process - """ - input_classes = list(self.get_input_classes()) - use_postgis = (len(input_classes) == 1 and - input_classes[0] == 'PostgisIO') - data = self.calc_postgis() if use_postgis else self.calc_pandas() - self.output.data = data - self.output.write() - logger.debug(self.output) - - -class ZonalStatsProcess(GaiaProcess): - """ - Calculates statistical values from a raster dataset for each polygon - in a vector dataset. - """ - - #: Tuple of required inputs; name, type , max # of each; None = no max - required_inputs = [ - {'description': 'Raster image', - 'type': types.RASTER, - 'max': 1 - }, - {'description': 'Zones', - 'type': types.VECTOR, - 'max': 1 - } - ] - - #: Default output format - default_output = formats.JSON - - def __init__(self, **kwargs): - super(ZonalStatsProcess, self).__init__(**kwargs) - if not self.output: - self.output = VectorFileIO(name='result', - uri=self.get_outpath()) - - def compute(self): - """ - Run the process - """ - self.output.create_output_dir(self.output.uri) - features = gdal_zonalstats( - self.inputs[1].read(format=formats.JSON, - epsg=self.inputs[0].get_epsg()), - self.inputs[0].read()) - self.output.data = GeoDataFrame.from_features(features) - self.output.write() diff --git a/gaia/inputs.py b/gaia/inputs.py deleted file mode 100644 index 220b31a..0000000 --- a/gaia/inputs.py +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# Copyright Kitware Inc. and Epidemico Inc. -# -# Licensed under the Apache License, Version 2.0 ( the "License" ); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################### -import json -import os -import errno -import shutil -from fiona import crs as fiona_crs -import gaia -from gaia import GaiaException, get_abspath - -try: - import osr -except ImportError: - from osgeo import osr -import gaia.formats as formats - - -class MissingParameterError(GaiaException): - """Raise when a required parameter is missing""" - pass - - -class MissingDataException(GaiaException): - """Raise when required data is missing""" - pass - - -class UnsupportedFormatException(GaiaException): - """Raise when an unsupported data format is used""" - pass - - -class GaiaIO(object): - """Abstract IO class for importing/exporting data from a certain source""" - data = None - filters = None - - type = None - format = None - default_output = None - - def __init__(self, **kwargs): - """ - Create a GaiaIO object, assigning attributes based on kwargs - - :param kwargs: Keyword arguments - """ - for k, v in kwargs.items(): - setattr(self, k, v) - - def read(self, *args, **kwargs): - """ - Abstract method for reading data - - :param args: Required arguments - :param kwargs: Keyword arguments - """ - raise NotImplementedError() - - def write(self, *args, **kwargs): - """ - Abstract method for writing data - - :param args: Required arguments - :param kwargs: Keyword arguments - """ - pass - - def create_output_dir(self, filepath): - """ - Create an output directory if it doesn't exist - - :param filepath: Directory to create - """ - dirname = os.path.dirname(filepath) - if dirname and not os.path.exists(dirname): - try: - os.makedirs(os.path.dirname(filepath)) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - - def get_epsg(self): - """ - Get the EPSG code of the data - - :return: EPSG code (integer) - """ - if self.data is None: - self.read() - if self.data.__class__.__name__ == 'GeoDataFrame': - if self.data.crs is None: - # Make educated guess about projection based on longitude coords - minx = min(self.data.geometry.bounds['minx']) - maxx = max(self.data.geometry.bounds['maxx']) - if minx >= -180.0 and maxx <= 180.0: - self.epsg = 4326 - self.data.crs = fiona_crs.from_epsg(self.epsg) - elif minx >= -20026376.39 and maxx <= 20026376.39: - self.epsg = 3857 - self.data.crs = fiona_crs.from_epsg(self.epsg) - else: - raise GaiaException('Could not determine data projection.') - return self.epsg - else: - crs = self.data.crs.get('init', None) - if crs: - if ':' in crs: - crs = crs.split(':')[1] - if crs.isdigit(): - self.epsg = int(crs) - return self.epsg - # Assume EPSG:4326 - self.epsg = 4326 - self.data.crs = fiona_crs.from_epsg(self.epsg) - return self.epsg - else: - # Assume EPSG:4326 - self.epsg = 4326 - self.data.crs = fiona_crs.from_epsg(self.epsg) - return self.epsg - elif self.data.__class__.__name__ == 'Dataset': - projection = self.data.GetProjection() - data_crs = osr.SpatialReference(wkt=projection) - try: - self.epsg = int(data_crs.GetAttrValue('AUTHORITY', 1)) - return self.epsg - except KeyError: - raise GaiaException("EPSG code coud not be determined") - - def delete(self): - """ - Abstract method for deleting the IO source - """ - raise NotImplementedError() - - -class FileIO(GaiaIO): - """Abstract class to read and write file data.""" - - def __init__(self, uri='', **kwargs): - """ - :param uri: Filepath of IO object - :param kwargs: - :return: - """ - if uri and not self.allowed_folder(uri): - raise GaiaException( - "Access to this directory is not permitted : {}".format( - os.path.dirname(uri))) - self.uri = uri - super(FileIO, self).__init__(uri=uri, **kwargs) - if self.uri: - self.ext = os.path.splitext(self.uri)[1].lower() - - def allowed_folder(self, folder): - """ - Return true or false if folder is in list of - allowed folders from config - - :param folder: folder to check - :return: True or False - """ - allowed_dirs = gaia.config['gaia']['fileio_paths'].split(',') - if not allowed_dirs[0] or allowed_dirs[0] == '': - return True - filepath = os.path.abspath(os.path.dirname(folder)) - allowed = False - for path in allowed_dirs: - if filepath.startswith(get_abspath(path)): - allowed = True - break - return allowed - - def delete(self): - """ - Remove file of IO object - - :return: None - """ - if os.path.exists(self.uri): - shutil.rmtree(os.path.dirname(self.uri)) - - -class JsonFileIO(FileIO): - """Read json and write json file data (such as .json)""" - - default_output = formats.JSON - - def read(self, format=formats.JSON): - """ - Load JSON data into a python object - - :param format: input format - :return: Python dict object - """ - if self.ext not in formats.JSON: - raise UnsupportedFormatException( - "Only the following weight formats are supported: {}".format( - ','.join(formats.JSON) - ) - ) - if self.data is None: - with open(self.uri, 'r') as f: - self.data = json.load(f) - return self.data - - def write(self, filename=None, as_type='json'): - """ - Write data (assumed dictionary object) to json file - - :param filename: Base filename - :param as_type: json - :return: location of file - """ - if not filename: - filename = self.uri - self.create_output_dir(filename) - if as_type == 'json': - with open(filename, 'w') as f: - json.dump(self.data, f) - else: - raise NotImplementedError('{} not a valid type'.format(as_type)) - return self.uri diff --git a/gaia/io/__init__.py b/gaia/io/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gaia/io/gaia_reader.py b/gaia/io/gaia_reader.py new file mode 100644 index 0000000..3c7deae --- /dev/null +++ b/gaia/io/gaia_reader.py @@ -0,0 +1,97 @@ +from __future__ import absolute_import, division, print_function +from builtins import ( + bytes, str, open, super, range, zip, round, input, int, pow, object +) +from future.utils import with_metaclass + +from gaia.gaia_data import GaiaDataObject +from gaia import GaiaException +from gaia.util import ( + MissingParameterError, + MissingDataException, + UnsupportedFormatException, + get_uri_extension +) + + +class GaiaReaderFactoryMetaclass(type): + """ + This is the metaclass for any type deriving from GaiaReader, providing + us with a registry of possible readers, which we can use to look for an + appropriate subtype at runtime, based on constructor args. + """ + _registry = {} + + """ + Make sure we include every GaiaReader subtype in our registry. + """ + def __new__(cls, clsname, bases, dct): + classtoreturn = super(GaiaReaderFactoryMetaclass, + cls).__new__(cls, clsname, bases, dct) + GaiaReaderFactoryMetaclass._registry[clsname] = classtoreturn + return classtoreturn + + """ + Intercept the constructor arguments generically generally when attempting + to instantiate a reader. If we aren't in here via direct use of the + GaiaReader class itself, then the developer probably knows what specific + subtype she wants, so we make sure she gets that. Otherwise, we will use + some heuristic to choose the subtype we construct. If we see a + 'reader_class' keyword argument, we try to choose that class. If not, we + iterate through all the registered readers, looking for a subclass with a + static "can_read" method which returns true given the specific constructor + arguments we got. Currently we construct the first one of these we find. + """ + def __call__(cls, *args, **kwargs): + registry = GaiaReaderFactoryMetaclass._registry + subclass = None + instance = None + + if id(cls) != id(GaiaReader): + # Allow for direct subclass instantiation + instance = cls.__new__(cls, args, kwargs) + else: + if 'reader_class' in kwargs: + classname = kwargs['reader_class'] + if classname in registry: + subclass = registry[classname] + else: + for classname, classinstance in registry.items(): + if hasattr(classinstance, 'can_read'): + canReadMethod = getattr(classinstance, 'can_read') + if canReadMethod(*args, **kwargs): + subclass = classinstance + # FIXME: + break + + if subclass: + instance = subclass.__new__(subclass, args, kwargs) + else: + argsstr = 'args: %s, kwargs: %s' % (args, kwargs) + msg = 'Unable to find GaiaReader subclass for: %s' % argsstr + raise GaiaException(msg) + + if instance is not None: + instance.__init__(*args, **kwargs) + + return instance + + +class GaiaReader(with_metaclass(GaiaReaderFactoryMetaclass, object)): + """ + Abstract base class, root of the reader class hierarchy. + """ + def __init__(self, *args, **kwargs): + pass + + """ + Return a GaiaDataObject + """ + def read(self, format=None, epsg=None): + return GaiaDataObject(reader=self) + + def load_metadata(self, dataObject): + print('GaiaReader _load_metadata()') + + def load_data(self, dataObject): + print('GaiaReader _load_data()') diff --git a/gaia/io/gdal_reader.py b/gaia/io/gdal_reader.py new file mode 100644 index 0000000..f7c3b23 --- /dev/null +++ b/gaia/io/gdal_reader.py @@ -0,0 +1,114 @@ +from __future__ import absolute_import, division, print_function +from builtins import ( + bytes, str, open, super, range, zip, round, input, int, pow, object +) + +import os + +try: + import osr +except ImportError: + from osgeo import osr + +import gdal +from gaia.geo.gdal_functions import ( + gdal_reproject, + raster_to_numpy_array, + rasterio_bbox, + rasterio_footprint +) + +from gaia.io.gaia_reader import GaiaReader +from gaia.gaia_data import GDALDataObject +from gaia.util import ( + UnhandledOperationException, + UnsupportedFormatException, + get_uri_extension +) +import gaia.formats as formats +import gaia.types as types + + +class GaiaGDALReader(GaiaReader): + """ + A specific subclass for reading GDAL files + """ + def __init__(self, *args, **kwargs): + super(GaiaGDALReader, self).__init__(*args, **kwargs) + + if 'uri' in kwargs: + self.uri = kwargs['uri'] + self.ext = '.%s' % get_uri_extension(self.uri) + + self.as_numpy_array = False + self.as_single_band = True + self.old_nodata = None + self.new_nodata = None + + @staticmethod + def can_read(*args, **kwargs): + if 'uri' in kwargs: + extension = get_uri_extension(kwargs['uri']) + if extension == 'tif' or extension == 'tiff': + return True + return False + + def read(self, format=formats.RASTER, epsg=None, as_numpy_array=False, + as_single_band=True, old_nodata=None, new_nodata=None): + """ + Read data from a raster dataset + + :param as_numpy_array: Output data as numpy + (default is False i.e. raster osgeo.gdal.Dataset) + :param as_single_band: Output data as 2D array of its first band + (default is True). If False, returns full 3D array. + :param old_nodata: Explicitly identify existing NoData values + (default None). If None, attempts to get existing NoData values stored + in the raster band. + :param new_nodata: Replace NoData values in each band with new_nodata + (default None). If new_nodata is not None but old_nodata is None + and no existing NoData value is stored in the band, uses unchanged + default ReadAsArray() return values. + :param epsg: EPSG code to reproject data to + :return: GDAL Dataset + """ + self.format = format + self.epsg = epsg + self.as_numpy_array = as_numpy_array + self.as_single_band = as_single_band + self.old_nodata = old_nodata + self.new_nodata = new_nodata + + # FIXME: if we got "as_numpy_array=True", should we return different + # data object type? + o = GDALDataObject(reader=self, dataFormat=self.format, epsg=self.epsg) + return o + + def load_metadata(self, dataObject): + self.__read_internal(dataObject) + + def load_data(self, dataObject): + self.__read_internal(dataObject) + + def __read_internal(self, dataObject): + if self.ext not in formats.RASTER: + raise UnsupportedFormatException( + "Only the following raster formats are supported: {}".format( + ','.join(formats.RASTER) + ) + ) + self.basename = os.path.basename(self.uri) + + dataObject.set_data(gdal.Open(self.uri)) + + if self.epsg and dataObject.get_epsg() != self.epsg: + dataObject.reproject(self.epsg) + + dataObject.set_metadata({}) + dataObject.datatype = types.RASTER + dataObject.dataformat = formats.RASTER + + if self.as_numpy_array: + raise UnhandledOperationException('Convert GDAL dataset to numpy') + # np_data = raster_to_numpy_array(out_data, as_single_band, + # old_nodata, new_nodata) diff --git a/gaia/io/geojson_reader.py b/gaia/io/geojson_reader.py new file mode 100644 index 0000000..a5e89d6 --- /dev/null +++ b/gaia/io/geojson_reader.py @@ -0,0 +1,83 @@ +from __future__ import absolute_import, division, print_function +from builtins import ( + bytes, str, open, super, range, zip, round, input, int, pow, object +) + +import re +import geopandas + +from gaia.io.readers import GaiaReader +from gaia.gaia_data import GaiaDataObject +from gaia import GaiaException +from gaia.util import ( + MissingParameterError, + MissingDataException, + UnsupportedFormatException, + get_uri_extension +) +import gaia.formats as formats +import gaia.types as types + + +class GaiaGeoJSONReader(GaiaReader): + """ + Another specific subclass for reading GeoJSON + """ + epsgRegex = re.compile('epsg:([\d]+)') + + def __init__(self, *args, **kwargs): + super(GaiaGeoJSONReader, self).__init__(*args, **kwargs) + + if 'uri' in kwargs: + self.uri = kwargs['uri'] + self.ext = '.%s' % get_uri_extension(self.uri) + + @staticmethod + def can_read(*args, **kwargs): + if 'uri' in kwargs: + extension = get_uri_extension(kwargs['uri']) + if extension == 'geojson': + return True + return False + + def read(self, format=None, epsg=None): + return super().read(format, epsg) + + def load_metadata(self, dataObject): + self.__read_internal(dataObject) + + def load_data(self, dataObject): + self.__read_internal(dataObject) + + def __read_internal(self, dataObject): + # FIXME: need to handle format + # if not self.format: + # self.format = self.default_output + + # FIXME: Should this check actually go into the can_read method? + if self.ext not in formats.VECTOR: + raise UnsupportedFormatException( + "Only the following vector formats are supported: {}".format( + ','.join(formats.VECTOR) + ) + ) + + data = geopandas.read_file(self.uri) + + # FIXME: still need to handle filtering + # if self.filters: + # self.filter_data() + + # FIXME: skipped the transformation step for now + # return self.transform_data(format, epsg) + + # do the actual reading and set both data and metadata + # on the dataObject parameter + dataObject.set_metadata({}) + dataObject.set_data(data) + epsgString = data.crs['init'] + m = self.epsgRegex.search(epsgString) + if m: + dataObject._epsg = int(m.group(1)) + dataObject.datatype = types.VECTOR + dataObject.dataformat = formats.VECTOR diff --git a/gaia/io/postgis_reader.py b/gaia/io/postgis_reader.py new file mode 100644 index 0000000..2e52d34 --- /dev/null +++ b/gaia/io/postgis_reader.py @@ -0,0 +1,48 @@ +from __future__ import absolute_import, division, print_function +from builtins import ( + bytes, str, open, super, range, zip, round, input, int, pow, object +) + +from gaia.gaia_data import PostgisDataObject +from gaia.io.readers import GaiaReader + +import gaia.types as types +import gaia.formats as formats + + +class GaiaPostGISReader(GaiaReader): + required_arguments = ['table', 'dbname', 'hostname', 'user', 'password'] + + def __init__(self, *args, **kwargs): + super(GaiaPostGISReader, self).__init__(*args, **kwargs) + self._args = args + self._kwargs = kwargs + + def read(self, format=None, epsg=None): + print('GaiaPostGISReader read()') + dataObject = PostgisDataObject(reader=self) + dataObject.format = format + dataObject.epsg = epsg + return dataObject + + def load_metadata(self, dataObject): + self.__set_db_properties(dataObject) + + def load_data(self, dataObject): + self.__set_db_properties(dataObject) + + def __set_db_properties(self, dataObject): + for key in self.required_arguments: + if key in self._kwargs: + print(' Setting property %s to %s' % (key, self._kwargs[key])) + setattr(dataObject, key, self._kwargs[key]) + dataObject.initialize_engine() + dataObject.datatype = types.VECTOR + dataObject.dataformat = formats.VECTOR + + @staticmethod + def can_read(*args, **kwargs): + for arg in GaiaPostGISReader.required_arguments: + if arg not in kwargs: + return False + return True diff --git a/gaia/io/readers.py b/gaia/io/readers.py new file mode 100644 index 0000000..ac809ba --- /dev/null +++ b/gaia/io/readers.py @@ -0,0 +1,3 @@ +from gaia.io.gaia_reader import GaiaReader +from gaia.io.geojson_reader import GaiaGeoJSONReader +from gaia.io.gdal_reader import GaiaGDALReader diff --git a/gaia/preprocess/__init__.py b/gaia/preprocess/__init__.py new file mode 100644 index 0000000..df9e3ed --- /dev/null +++ b/gaia/preprocess/__init__.py @@ -0,0 +1,22 @@ + +from gaia.preprocess.pandas_processes import * +from gaia.preprocess.gdal_processes import * + +from gaia.process_registry import compute + + +# def crop (inputs=[], args={}): +def crop (*args, **kwargs): + return compute('crop', inputs=list(args), args=kwargs) + +# def centroid(inputs=[], args={}): +# return compute('centroid', inputs=inputs, args=args) + +# def intersection(inputs=[], args={}): +# return compute('intersection', inputs=inputs, args=args) + +# def within(inputs=[], args=[]): +# return compute('within', inputs=inputs, args=args) + +# def subset(inputs=[], args=[]): +# return compute('subset', inputs=inputs, args=args) diff --git a/gaia/preprocess/gdal_processes.py b/gaia/preprocess/gdal_processes.py new file mode 100644 index 0000000..62d4700 --- /dev/null +++ b/gaia/preprocess/gdal_processes.py @@ -0,0 +1,46 @@ +from __future__ import absolute_import, division, print_function +from builtins import ( + bytes, str, open, super, range, zip, round, input, int, pow, object +) + +from gaia import GaiaException +from gaia.gaia_data import GDALDataObject +from gaia.validators import validate_subset +from gaia.process_registry import register_process +from gaia.geo.gdal_functions import gdal_clip + + +def validate_gdal(v): + """ + Rely on the base validate method for the bulk of the work, just make + sure the inputs are gdal-compatible. + """ + def validator(inputs=[], args=[]): + # FIXME: we should check we have a specific gdal type input also + return v(inputs, args) + return validator + + +@register_process('crop') +@validate_subset +@validate_gdal +def compute_subset_gdal(inputs=[], args=[]): + """ + Runs the subset computation, creating a raster dataset as output. + """ + raster, clip = inputs[0], inputs[1] + raster_img = raster.get_data() + + if clip.get_epsg() != raster.get_epsg(): + clip.reproject(raster.get_epsg()) + + clip_json = clip.get_data().geometry.unary_union.__geo_interface__ + + # Passing "None" as second arg instead of a file path. This tells gdal_clip + # not to write the output dataset to a tiff file on disk + output_dataset = gdal_clip(raster_img, None, clip_json) + + outputDataObject = GDALDataObject() + outputDataObject.set_data(output_dataset) + + return outputDataObject diff --git a/gaia/preprocess/pandas_processes.py b/gaia/preprocess/pandas_processes.py new file mode 100644 index 0000000..be5d5a5 --- /dev/null +++ b/gaia/preprocess/pandas_processes.py @@ -0,0 +1,98 @@ +from __future__ import absolute_import, division, print_function +from builtins import ( + bytes, str, open, super, range, zip, round, input, int, pow, object +) + +import gaia.validators as validators +from gaia.process_registry import register_process +from gaia import GaiaException +from gaia.gaia_data import GaiaDataObject + +from geopandas import GeoDataFrame +from geopandas import GeoSeries + + +def validate_pandas(v): + """ + Since the base validate method we import does most of the work in a fairly + generic way, this function only needs to add a little bit to that: make + sure the inputs contain geopandas dataframe. Additionally, all the + processes defined in this module can re-use the same validate method. + """ + def validator(inputs=[], args={}): + # First should check if input is compatible w/ pandas computation + if type(inputs[0].get_data()) is not GeoDataFrame: + raise GaiaException('pandas process requires a GeoDataFrame') + + # Otherwise call up the chain to let parent do common validation + return v(inputs, args) + + return validator + + +@register_process('crop') +@validators.validate_within +@validate_pandas +def crop_pandas(inputs=[], args={}): + """ + Calculate the within process using pandas GeoDataFrames + + :return: within result as a GeoDataFrame + """ + first, second = inputs[0], inputs[1] + if first.get_epsg() != second.get_epsg(): + second.reproject(epsg=first.get_epsg()) + first_df, second_df = first.get_data(), second.get_data() + first_within = first_df[first_df.geometry.within( + second_df.geometry.unary_union)] + + outputDataObject = GaiaDataObject() + outputDataObject.set_data(first_within) + return outputDataObject + + +# """ +# These methods can be very small and focused on doing only one thing: given +# an array of inputs and possibly some arguments, do the computation and +# return something (maybe a GaiaDataObject, or perhaps just a number) +# """ +# @register_process('centroid') +# @validate_centroid +# @validate_pandas +# def compute_centroid_pandas(inputs=[], args={}): +# """ +# Calculate the centroid using pandas GeoDataFrames + +# :return: centroid as a GeoDataFrame +# """ + +# # Only worry about a pandas version of the compute method +# print('compute_centroid_pandas') + +# df_in = inputs[0].get_data() +# df = GeoDataFrame(df_in.copy(), geometry=df_in.geometry.name) +# if 'combined' in args and args['combined']: +# gs = GeoSeries(df.geometry.unary_union.centroid, +# name=df_in.geometry.name) +# output_data = GeoDataFrame(gs) +# else: +# df[df.geometry.name] = df.geometry.centroid +# output_data = df + +# # Now processes need to create and return a GaiaDataObject, whose +# # "data" member contains the actual data +# outputDataObject = GaiaDataObject() +# outputDataObject.set_data(output_data) + +# return outputDataObject + + +# """ +# Do a pandas intersection +# """ +# @register_process('intersection') +# @validate_intersection +# @validate_pandas +# def compute_intersection_pandas(inputs=[], args={}): +# print('compute_intersection_pandas') +# return None diff --git a/gaia/process_registry.py b/gaia/process_registry.py new file mode 100644 index 0000000..3b256b5 --- /dev/null +++ b/gaia/process_registry.py @@ -0,0 +1,82 @@ +from __future__ import absolute_import, division, print_function +from builtins import ( + bytes, str, open, super, range, zip, round, input, int, pow, object +) + +from gaia import GaiaException + + +""" +A process is just two stateless methods, 'validate' and 'compute', +associated with a process name. +""" + +""" +By making this is a module-level variable, anyone who imports the module +has access to the single, global registry. This avoids the need for some +bootstrapping code that instantiates a single registry and makes it +available to the whole application. +""" +__process_registry = {} + + +def find_processes(processName): + """ + Return a list of registry entries that implement the named process. + """ + if processName in __process_registry: + return __process_registry[processName] + return None + + +def register_process(processName): + """ + Return a process registration decorator + """ + def processRegistrationDecorator(computeMethod): + if processName not in __process_registry: + __process_registry[processName] = [] + __process_registry[processName].append(computeMethod) + return computeMethod + return processRegistrationDecorator + + +def list_processes(processName=None): + """ + Display a list of the processes in the registry, for debugging or + informational purposes. + """ + def display_processes(name, plist): + print('%s processes:' % name) + for item in plist: + print(item) + + if processName is not None: + if processName in __process_registry: + display_processes(processName, __process_registry[processName]) + else: + print('No processes registered for %s' % processName) + else: + for pName in __process_registry: + display_processes(pName, __process_registry[pName]) + + +def compute(processName, inputs, args): + """ + Just looks up a process that can do the job and asks it to 'compute' + """ + processes = find_processes(processName) + + if not processes: + list_processes(processName) + raise GaiaException('Unable to find suitable %s process' % processName) + + for p in processes: + # How will we choose between equally "valid" processes? For now + # just return the first one. + try: + return p(inputs, args) + except GaiaException: + pass + + raise GaiaException('No registered processes were able to validate inputs') diff --git a/gaia/util.py b/gaia/util.py new file mode 100644 index 0000000..8c13fee --- /dev/null +++ b/gaia/util.py @@ -0,0 +1,36 @@ +from __future__ import absolute_import, division, print_function +from builtins import ( + bytes, str, open, super, range, zip, round, input, int, pow, object +) + +from gaia import GaiaException + + +sqlengines = {} + + +class MissingParameterError(GaiaException): + """Raise when a required parameter is missing""" + pass + + +class MissingDataException(GaiaException): + """Raise when required data is missing""" + pass + + +class UnhandledOperationException(GaiaException): + """Raise when required data is missing""" + pass + + +class UnsupportedFormatException(GaiaException): + """Raise when an unsupported data format is used""" + pass + + +def get_uri_extension(uripath): + idx = uripath.rfind('.') + if idx >= 0: + return uripath[idx + 1:] + return None diff --git a/gaia/validate_base.py b/gaia/validate_base.py new file mode 100644 index 0000000..8f65f51 --- /dev/null +++ b/gaia/validate_base.py @@ -0,0 +1,76 @@ +from __future__ import absolute_import, division, print_function +from builtins import ( + bytes, str, open, super, range, zip, round, input, int, pow, object +) + +from gaia import GaiaException +from gaia import formats +import gaia.types as types + + +def test_arg_type(args, arg, arg_type): + """ + Try to cast a process argument to its required type. Raise an + exception if not successful. + :param arg: The argument property + :param arg_type: The required argument type (int, str, etc) + """ + try: + arg_type(args[arg]) + except Exception: + raise GaiaException('Required argument {} must be of type {}' + .format(arg, arg_type)) + + +def validate_base(inputs, args, required_inputs=[], required_args=[], + optional_args=[]): + """ + Ensure that all required inputs and arguments are present. + """ + input_types = [] + errors = [] + + for procInput in inputs: + inputDataType = procInput.datatype + if inputDataType == types.PROCESS: + for t in [i for i in dir(types) if not i.startswith("__")]: + if any((True for x in procInput.default_output if x in getattr( + formats, t, []))): + inputDataType = getattr(types, t) + break + input_types.append(inputDataType) + + for i, req_input in enumerate(required_inputs): + if i >= len(input_types): + errors.append("Not enough inputs for process") + elif req_input['type'] != input_types[i]: + errors.append("Input #{} is of incorrect type.".format(i+1)) + + if len(input_types) > len(required_inputs): + if (required_inputs[-1]['max'] is not None and + len(input_types) > len(required_inputs) + + required_inputs[-1]['max']-1): + errors.append("Incorrect # of inputs; expected {}".format( + len(required_inputs))) + else: + for i in range(len(required_inputs)-1, len(input_types)): + if input_types[i] != required_inputs[-1]['type']: + errors.append( + "Input #{} is of incorrect type.".format(i + 1)) + if errors: + raise GaiaException('\n'.join(errors)) + for item in required_args: + arg, arg_type = item['name'], item['type'] + if arg not in args or args[arg] is None: + raise GaiaException('Missing required argument {}'.format(arg)) + test_arg_type(args, arg, arg_type) + if 'options' in item and args[arg] not in item['options']: + raise GaiaException('Invalid value for {}'.format(item['name'])) + for item in optional_args: + arg, arg_type = item['name'], item['type'] + if arg in optional_args and optional_args[arg] is not None: + test_arg_type(optional_args, arg, arg_type) + argval = args[arg] + if 'options' in item and argval not in item['options']: + raise GaiaException( + 'Invalid value for {}'.format(item['name'])) diff --git a/gaia/validators.py b/gaia/validators.py new file mode 100644 index 0000000..2094794 --- /dev/null +++ b/gaia/validators.py @@ -0,0 +1,97 @@ +from __future__ import absolute_import, division, print_function +from builtins import ( + bytes, str, open, super, range, zip, round, input, int, pow, object +) + +import gaia.types as types +from gaia.validate_base import validate_base + + +# """ +# Decorator for validating centroid process inputs. This can be reused by both +# the pandas and postgis centroid implementations, while a different one would +# be required in the case of a gdal/raster centroid. +# """ +# def validate_centroid(v): +# def centroid_validator(inputs=[], args={}): +# required_inputs = [{ +# 'description': 'Line/Polygon dataset', +# 'type': types.VECTOR, +# 'max': 1 +# }] + +# optional_args = [{ +# 'name': 'combined', +# 'title': 'Combined', +# 'description': 'Get centroid of features (default False)', +# 'type': bool, +# }] + +# validate_base(inputs, args, required_inputs=required_inputs, +# optional_args=optional_args) +# return v(inputs, args) + +# return centroid_validator + + +# """ +# Decorator for validating intersection process inputs +# """ +# def validate_intersection(v): +# def intersection_validator(inputs=[], args={}): +# required_inputs=[{ +# 'description': 'Feature dataset', +# 'type': types.VECTOR, +# 'max': 1 +# },{ +# 'description': 'Intersect dataset', +# 'type': types.VECTOR, +# 'max': 1 +# }] + +# validate_base(inputs, args, required_inputs=required_inputs) +# return v(inputs, args) + +# return intersection_validator + + +def validate_within(v): + """ + Decorator for validating within process inputs + """ + def within_validator(inputs=[], args={}): + required_inputs = [{ + 'description': 'Feature dataset', + 'type': types.VECTOR, + 'max': 1 + }, { + 'description': 'Within dataset', + 'type': types.VECTOR, + 'max': 1 + }] + + validate_base(inputs, args, required_inputs=required_inputs) + return v(inputs, args) + + return within_validator + + +def validate_subset(v): + """ + Decorator for validating subset process inputs + """ + def subset_validator(inputs=[], args=[]): + required_inputs = [{ + 'description': 'Image to subset', + 'type': types.RASTER, + 'max': 1 + }, { + 'description': 'Subset area:', + 'type': types.VECTOR, + 'max': 1 + }] + + validate_base(inputs, args, required_inputs=required_inputs) + return v(inputs, args) + + return subset_validator diff --git a/requirements.txt b/requirements.txt index d9e6547..adb8c93 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ pip>=6.0 # All main requirements must go here, optional sections # last until the next optional tag or to the end of the file. +future>=0.16.0 numpy>=1.10.0 six>=1.11.0 requests>=2.7.0 diff --git a/tests/cases/test_create.py b/tests/cases/test_create.py new file mode 100644 index 0000000..8ec8213 --- /dev/null +++ b/tests/cases/test_create.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +############################################################################### +# Copyright Kitware Inc. and Epidemico Inc. +# +# Licensed under the Apache License, Version 2.0 ( the "License" ); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +import os +import json +import unittest +from zipfile import ZipFile +import gaia +from gaia.preprocess import crop + +base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__))) +testfile_path = os.path.join(base_dir, '../data') + + +class TestCreateAPI(unittest.TestCase): + + @classmethod + def setUpClass(cls): + config_file = os.path.join(base_dir, '../../gaia/conf/gaia.cfg') + gaia.get_config(config_file) + + def test_create_api(self): + """ + Test cropping (within process) for vector inputs + """ + path1 = os.path.join(testfile_path, 'iraq_hospitals.geojson') + path2 = os.path.join(testfile_path, 'baghdad_districts.geojson') + + data1 = gaia.create(uri=path1) + data2 = gaia.create(uri=path2) + + output = crop(data1, data2) + + self.assertEquals(len(output.get_data()), 19) diff --git a/tests/cases/test_parser.py b/tests/cases/test_parser.py deleted file mode 100644 index f69d6f6..0000000 --- a/tests/cases/test_parser.py +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# Copyright Kitware Inc. and Epidemico Inc. -# -# Licensed under the Apache License, Version 2.0 ( the "License" ); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################### -import json -import os -import unittest -from zipfile import ZipFile -from gaia import formats -from gaia.parser import deserialize - -testfile_path = os.path.join(os.path.dirname( - os.path.realpath(__file__)), '../data') - - -class TestGaiaRequestParser(unittest.TestCase): - """Tests for the GaiaRequestParser class""" - - def test_process_within(self): - """Test Within process with nested Buffer process""" - with open(os.path.join(testfile_path, - 'within_nested_buffer_process.json')) as inf: - body_text = inf.read().replace('{basepath}', testfile_path) - process = json.loads(body_text, object_hook=deserialize) - try: - process.compute() - output = json.loads(process.output.read(format=formats.JSON)) - with open(os.path.join( - testfile_path, - 'within_nested_buffer_process_result.json')) as exp: - expected_json = json.load(exp) - self.assertIn('features', output) - self.assertEquals(len(expected_json['features']), - len(output['features'])) - self.assertIsNotNone(process.id) - self.assertIn(process.id, process.output.uri) - finally: - if process: - process.purge() - - def test_process_intersects(self): - """Test Intersects process""" - with open(os.path.join(testfile_path, - 'intersects_process.json')) as inf: - body_text = inf.read().replace('{basepath}', testfile_path) - process = json.loads(body_text, object_hook=deserialize) - try: - process.compute() - output = json.loads(process.output.read(format=formats.JSON)) - with open(os.path.join( - testfile_path, - 'intersects_process_results.json')) as exp: - expected_json = json.load(exp) - self.assertIn('features', output) - self.assertEquals(len(expected_json['features']), - len(output['features'])) - finally: - if process: - process.purge() - - def test_process_disjoint(self): - """Test Disjoint Process""" - with open(os.path.join(testfile_path, - 'difference_process.json')) as inf: - body_text = inf.read().replace('{basepath}', testfile_path) - process = json.loads(body_text, object_hook=deserialize) - try: - process.compute() - output = json.loads(process.output.read(format=formats.JSON)) - with open(os.path.join( - testfile_path, - 'difference_process_results.json')) as exp: - expected_json = json.load(exp) - self.assertIn('features', output) - self.assertEquals(len(expected_json['features']), - len(output['features'])) - finally: - if process: - process.purge() - - def test_process_union(self): - """Test Union Process""" - with open(os.path.join(testfile_path, - 'union_process.json')) as inf: - body_text = inf.read().replace('{basepath}', testfile_path) - process = json.loads(body_text, object_hook=deserialize) - try: - process.compute() - output = json.loads(process.output.read(format=formats.JSON)) - with open(os.path.join( - testfile_path, - 'union_process_results.json')) as exp: - expected_json = json.load(exp) - self.assertIn('features', output) - self.assertEquals(len(expected_json['features']), - len(output['features'])) - finally: - if process: - process.purge() - - def test_process_centroid(self): - """Test Centroid Process""" - with open(os.path.join(testfile_path, - 'centroid_process.json')) as inf: - body_text = inf.read().replace('{basepath}', testfile_path) - process = json.loads(body_text, object_hook=deserialize) - try: - process.compute() - output = json.loads(process.output.read(format=formats.JSON)) - with open(os.path.join( - testfile_path, - 'centroid_process_results.json')) as exp: - expected_json = json.load(exp) - self.assertIn('features', output) - self.assertEquals(len(expected_json['features']), - len(output['features'])) - finally: - if process: - process.purge() - - def test_process_distance(self): - """Test Distance Process""" - with open(os.path.join(testfile_path, - 'distance_process.json')) as inf: - body_text = inf.read().replace('{basepath}', testfile_path) - process = json.loads(body_text, object_hook=deserialize) - try: - process.compute() - output = json.loads(process.output.read(format=formats.JSON)) - with open(os.path.join( - testfile_path, - 'distance_process_results.json')) as exp: - expected_json = json.load(exp) - self.assertIn('features', output) - self.assertEquals(len(expected_json['features']), - len(output['features'])) - finally: - if process: - process.purge() - - def test_process_subset_raster(self): - """Test raster subset process with a raster file and geojson file""" - with open(os.path.join( - testfile_path, 'raster_subset_process.json')) as inf: - body_text = inf.read().replace('{basepath}', testfile_path) - process = json.loads(body_text, object_hook=deserialize) - zipfile = ZipFile(os.path.join(testfile_path, '2states.zip'), 'r') - - try: - zipfile.extract('2states.geojson', testfile_path) - process.compute() - self.assertEquals(type(process.output.data).__name__, 'Dataset') - self.assertTrue(os.path.exists(process.output.uri)) - self.assertIsNotNone(process.id) - self.assertIn(process.id, process.output.uri) - finally: - testfile = os.path.join(testfile_path, '2states.geojson') - if os.path.exists(testfile): - os.remove(testfile) - if process: - process.purge() - - def test_process_within_featureio(self): - """Test Within process with nested Buffer process using geojson input""" - with open(os.path.join( - testfile_path, - 'within_nested_buffer_features_process.json')) as inf: - process = json.load(inf, object_hook=deserialize) - - try: - process.compute() - output = json.loads(process.output.read(format=formats.JSON)) - with open(os.path.join( - testfile_path, - 'within_nested_buffer_features_process_result.json')) as gj: - expected_json = json.load(gj) - self.assertIn('features', output) - self.assertEquals(len(expected_json['features']), - len(output['features'])) - self.assertIsNotNone(process.id) - self.assertIn(process.id, process.output.uri) - finally: - if process: - process.purge() diff --git a/tests/cases/test_postgis.py b/tests/cases/test_postgis.py deleted file mode 100644 index 91ae0df..0000000 --- a/tests/cases/test_postgis.py +++ /dev/null @@ -1,159 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# Copyright Kitware Inc. and Epidemico Inc. -# -# Licensed under the Apache License, Version 2.0 ( the "License" ); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################### -import codecs -import glob -import json -import unittest -import os -import sqlalchemy -from sqlalchemy.exc import OperationalError -from sqlalchemy.pool import NullPool -try: - from mock import patch -except ImportError: - from unittest.mock import patch -from sqlalchemy import create_engine, text -import gaia.geo -import gaia -import gaia.formats as formats -from gaia.geo.geo_inputs import PostgisIO - -base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__))) - - -def get_engine(self, connection_string): - if connection_string not in gaia.sqlengines: - gaia.sqlengines[connection_string] = create_engine( - self.get_connection_string(), poolclass=NullPool) - return gaia.sqlengines[connection_string] - - -class TestPostGisDB(unittest.TestCase): - - @classmethod - def get_connection(cls, dbname=''): - auth = cls.user - if cls.password: - auth = cls.user + ":" + cls.password - auth += '@' - - conn_string = 'postgresql://{auth}{host}{dbname}'.format( - auth=auth, - host=cls.host, - dbname='/' + dbname - ) - engine = sqlalchemy.create_engine(conn_string) - return engine.connect() - - @classmethod - def setUpClass(cls): - config_file = os.path.join(base_dir, '../conf/test.dist.cfg') - config = gaia.get_config(config_file) - cls.user = config['gaia_postgis']['user'] - cls.password = config['gaia_postgis']['password'] - cls.host = config['gaia_postgis']['host'] - cls.dbname = config['gaia_postgis']['dbname'] - - try: - connection = cls.get_connection() - except OperationalError: - raise unittest.SkipTest() - iso_level = connection.connection.connection.isolation_level - connection.connection.connection.set_isolation_level(0) - try: - connection.execute("DROP DATABASE IF EXISTS {}".format(cls.dbname)) - connection.execute("CREATE DATABASE {}".format(cls.dbname)) - connection.close() - connection.engine.dispose() - connection = cls.get_connection(cls.dbname) - connection.execute(text("CREATE EXTENSION postgis;"). - execution_options(autocommit=True)) - for sqlfile in glob.glob(os.path.join(base_dir, "../data/*.sql")): - with codecs.open(sqlfile, "r", "utf-8") as inf: - sql = inf.read() - connection.execute( - text(sql).execution_options(autocommit=True)) - finally: - connection.connection.connection.set_isolation_level(iso_level) - connection.close() - connection.engine.dispose() - - @classmethod - def tearDownClass(cls): - connection = cls.get_connection() - iso_level = connection.connection.connection.isolation_level - connection.connection.connection.set_isolation_level(0) - try: - connection.execute("DROP DATABASE IF EXISTS {}".format(cls.dbname)) - finally: - connection.connection.connection.set_isolation_level(iso_level) - connection.close() - connection.engine.dispose() - - @patch('gaia.geo.geo_inputs.PostgisIO.get_engine', get_engine) - def test_area_process(self): - pgio = PostgisIO(table='baghdad_districts', - host=self.host, - dbname=self.dbname, - user=self.user, - password=self.password, - filters=[('nname', 'LIKE', 'B%')]) - process = gaia.geo.AreaProcess(inputs=[pgio]) - try: - process.compute() - with open(os.path.join( - base_dir, - '../data/area_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read(format=formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - if process: - process.purge() - - @patch('gaia.geo.geo_inputs.PostgisIO.get_engine', get_engine) - def test_within(self): - """ - Test WithinProcess for PostGIS inputs - """ - pg_hospitals = PostgisIO(table='iraq_hospitals', - host=self.host, - dbname=self.dbname, - user=self.user, - password=self.password) - pg_districts = PostgisIO(table='baghdad_districts', - host=self.host, - dbname=self.dbname, - user=self.user, - password=self.password, - filters=[('nname', 'LIKE', 'A%')]) - process = gaia.geo.WithinProcess(inputs=[pg_hospitals, pg_districts]) - try: - process.compute() - with open(os.path.join( - base_dir, - '../data/within_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read(format=formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - if process: - process.purge() diff --git a/tests/cases/test_processes.py b/tests/cases/test_processes.py new file mode 100644 index 0000000..5249df1 --- /dev/null +++ b/tests/cases/test_processes.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +############################################################################### +# Copyright Kitware Inc. and Epidemico Inc. +# +# Licensed under the Apache License, Version 2.0 ( the "License" ); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +import os +import json +import unittest +from zipfile import ZipFile +import gaia +from gaia.preprocess import crop +from gaia.io import readers + +base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__))) +testfile_path = os.path.join(base_dir, '../data') + + +class TestGaiaProcesses(unittest.TestCase): + + @classmethod + def setUpClass(cls): + config_file = os.path.join(base_dir, '../../gaia/conf/gaia.cfg') + gaia.get_config(config_file) + + def test_crop_pandas(self): + """ + Test cropping (within process) for vector inputs + """ + reader1 = readers.GaiaReader( + uri=os.path.join(testfile_path, 'iraq_hospitals.geojson')) + + reader2 = readers.GaiaReader( + uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) + + output = crop(reader1.read(), reader2.read()) + + self.assertEquals(len(output.get_data()), 19) + + def test_crop_gdal(self): + """ + Test cropping (subset process) for vector & raster inputs + """ + zipfile = ZipFile(os.path.join(testfile_path, '2states.zip'), 'r') + zipfile.extract('2states.geojson', testfile_path) + + try: + reader1 = readers.GaiaReader( + uri=os.path.join(testfile_path, 'globalairtemp.tif')) + rasterData = reader1.read() + + reader2 = readers.GaiaReader( + uri=os.path.join(testfile_path, '2states.geojson')) + vectorData = reader2.read() + + output = crop(rasterData, vectorData) + + self.assertEquals(type(output.get_data()).__name__, 'Dataset') + finally: + testfile = os.path.join(testfile_path, '2states.geojson') + if os.path.exists(testfile): + os.remove(testfile) diff --git a/tests/cases/test_processors.py b/tests/cases/test_processors.py deleted file mode 100644 index d4f9ff3..0000000 --- a/tests/cases/test_processors.py +++ /dev/null @@ -1,571 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# Copyright Kitware Inc. and Epidemico Inc. -# -# Licensed under the Apache License, Version 2.0 ( the "License" ); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -############################################################################### -import os -import json -import unittest -from zipfile import ZipFile -import gaia -import gaia.geo as geo -from gaia.geo.geo_inputs import RasterFileIO, VectorFileIO, FeatureIO - -base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__))) -testfile_path = os.path.join(base_dir, '../data') - - -class TestGaiaProcessors(unittest.TestCase): - - @classmethod - def setUpClass(cls): - config_file = os.path.join(base_dir, '../../gaia/conf/gaia.cfg') - gaia.get_config(config_file) - - def test_zonalstats(self): - vector_io = FeatureIO(features=[ - {"type": "Feature", - "geometry": { - "type": "Polygon", - "coordinates": [ - [[100.0, 0.0], [120.0, 0.0], [120.0, 30.0], - [100.0, 30.0], [100.0, 0.0]] - ] - }, - "properties": { - "prop0": "value1", - "prop1": {"this": "that"} - } - }, - {"type": "Feature", - "geometry": { - "type": "Polygon", - "coordinates": [ - [[-100.0, 0.0], [-120.0, 0.0], [-120.0, -30.0], - [100.0, -30.0], [100.0, 0.0]] - ] - }, - "properties": { - "prop0": "value0", - "prop1": {"this": "other thing"} - } - }]) - raster_io = RasterFileIO(name='temp', uri=os.path.join( - testfile_path, 'globalairtemp.tif')) - process = geo.ZonalStatsProcess(inputs=[raster_io, vector_io]) - try: - process.compute() - with open(os.path.join( - testfile_path, - 'zonalstats_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read( - format=geo.formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - pass - if process: - process.purge() - - def test_within(self): - """ - Test WithinProcess for vector inputs - """ - vector1_io = VectorFileIO( - uri=os.path.join(testfile_path, 'iraq_hospitals.geojson')) - vector2_io = VectorFileIO( - uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) - process = geo.WithinProcess(inputs=[vector1_io, vector2_io]) - try: - process.compute() - self.assertEquals(len(process.output.data), 19) - finally: - if process: - process.purge() - - def test_intersect(self): - """ - Test IntersectsProcess for vector inputs - """ - vector1_io = VectorFileIO( - uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) - vector2_io = VectorFileIO( - uri=os.path.join(testfile_path, 'iraq_roads.geojson')) - process = geo.IntersectsProcess( - inputs=[vector1_io, vector2_io]) - try: - process.compute() - with open(os.path.join( - testfile_path, - 'intersects_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read( - format=geo.formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - if process: - process.purge() - - def test_cross(self): - """ - Test IntersectsProcess for vector inputs - """ - vector1_io = VectorFileIO( - uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) - vector2_io = VectorFileIO( - uri=os.path.join(testfile_path, 'iraq_roads.geojson')) - process = geo.CrossesProcess( - inputs=[vector1_io, vector2_io]) - try: - process.compute() - with open(os.path.join( - testfile_path, - 'crosses_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read( - format=geo.formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - if process: - process.purge() - - def test_touch(self): - """ - Test IntersectsProcess for vector inputs - """ - vector1_io = VectorFileIO( - uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) - vector2_io = VectorFileIO( - uri=os.path.join(testfile_path, 'iraq_roads.geojson')) - process = geo.TouchesProcess( - inputs=[vector1_io, vector2_io]) - try: - process.compute() - with open(os.path.join( - testfile_path, - 'touches_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read( - format=geo.formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - if process: - process.purge() - - def test_union(self): - """ - Test UnionProcess for vector inputs - """ - vector1_io = VectorFileIO( - uri=os.path.join(testfile_path, 'baghdad_districts.geojson'), - filters=[('NNAME', 'contains', '^A')]) - vector2_io = VectorFileIO( - uri=os.path.join(testfile_path, 'baghdad_districts.geojson'), - filters=[('NNAME', 'contains', '^B')]) - process = geo.UnionProcess(inputs=[vector1_io, vector2_io]) - try: - process.compute() - with open(os.path.join( - testfile_path, - 'union_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read( - format=geo.formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - if process: - process.purge() - - def test_disjoint(self): - """ - Test DisjointProcess for vector inputs - """ - vector1_io = VectorFileIO( - uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) - vector2_io = VectorFileIO( - uri=os.path.join(testfile_path, 'iraq_roads.geojson')) - process = geo.DisjointProcess(inputs=[vector1_io, vector2_io]) - try: - process.compute() - with open(os.path.join( - testfile_path, - 'difference_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read( - format=geo.formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - if process: - process.purge() - - def test_centroid(self): - """ - Test CentroidProcess for vector inputs - """ - vector1_io = VectorFileIO( - uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) - process = geo.CentroidProcess(inputs=[vector1_io]) - try: - process.compute() - with open(os.path.join( - testfile_path, - 'centroid_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read( - format=geo.formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - if process: - process.purge() - - def test_distance(self): - """ - Test DistanceProcess for vector inputs - """ - vector1_io = VectorFileIO( - uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) - vector2_io = VectorFileIO( - uri=os.path.join(testfile_path, 'iraq_hospitals.geojson')) - process = geo.DistanceProcess(inputs=[vector1_io, vector2_io]) - try: - process.compute() - with open(os.path.join( - testfile_path, - 'distance_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read( - format=geo.formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - if process: - process.purge() - - def test_raster_bbox(self): - raster_io = RasterFileIO( - uri=os.path.join(testfile_path, - 'satellite_test_data', - 'LC81070352015218LGN00_B5.TIF')) - - bbox = raster_io.get_bbox() - self.assertEquals(bbox, [307366.7298195886, 3869968.931078481, - 539974.1151507461, 4107482.3190209507]) - - def test_raster_footprint(self): - raster_io = RasterFileIO( - uri=os.path.join(testfile_path, - 'satellite_test_data', - 'LC81070352015218LGN00_B5.TIF')) - - footprint = raster_io.get_footprint() - self.assertEquals(footprint, [(354404.6484867488, 4107482.3190209507), - (307366.7298195886, 3915415.660631991), - (492936.19648358587, 3869968.931078481), - (539974.1151507461, 4062035.5894674407), - (354404.6484867488, 4107482.3190209507)]) - - def test_subset_raster(self): - """ - Test SubsetProcess for vector & raster inputs - """ - zipfile = ZipFile(os.path.join(testfile_path, '2states.zip'), 'r') - zipfile.extract('2states.geojson', testfile_path) - - vector_io = VectorFileIO( - uri=os.path.join(testfile_path, '2states.geojson')) - raster_io = RasterFileIO( - uri=os.path.join(testfile_path, 'globalairtemp.tif')) - process = geo.SubsetProcess(inputs=[raster_io, vector_io]) - try: - process.compute() - self.assertEquals(type(process.output.data).__name__, 'Dataset') - self.assertTrue(os.path.exists(process.output.uri)) - self.assertIsNotNone(process.id) - self.assertIn(process.id, process.output.uri) - finally: - testfile = os.path.join(testfile_path, '2states.geojson') - if os.path.exists(testfile): - os.remove(testfile) - if process: - process.purge() - - def test_rastermath_add(self): - """ - Test adding two rasters together - """ - raster1_io = RasterFileIO( - name='A', uri=os.path.join(testfile_path, 'globalairtemp.tif')) - raster2_io = RasterFileIO( - name='B', uri=os.path.join(testfile_path, 'globalprecip.tif')) - calc = 'A + B' - bands = [1, 1] - - process = geo.RasterMathProcess( - inputs=[raster1_io, raster2_io], calc=calc, bands=bands) - try: - process.compute() - self.assertTrue(os.path.exists(process.output.uri)) - oraster, raster1, raster2 = [x.read() for x in ( - process.output, raster1_io, raster2_io)] - # Output raster should be same dimensions as raster 1 - self.assertEquals((oraster.RasterXSize, oraster.RasterYSize), - (raster1.RasterXSize, raster1.RasterYSize)) - orb, r1b, r2b = [x.GetRasterBand(1) - for x in (oraster, raster1, raster2)] - # Min value of output should be >= the max minimum of inputs - self.assertGreaterEqual(orb.GetStatistics(False, True)[0], - max(r1b.GetStatistics(False, True)[0], - r2b.GetStatistics(False, True)[0])) - - # Max value of output >= max(minimum)+min(maximum) of inputs - self.assertGreaterEqual(orb.GetStatistics(False, True)[1], - max(r1b.GetStatistics(False, True)[0], - r2b.GetStatistics(False, True)[0]) + - min(r1b.GetStatistics(False, True)[1], - r2b.GetStatistics(False, True)[1])) - finally: - if process: - process.purge() - - def test_rastermath_multiply_by_value(self): - """ - Test multiplying a raster by a value, - and specifying output type (Float32) - """ - raster1_io = RasterFileIO( - name='A', uri=os.path.join(testfile_path, 'globalprecip.tif')) - calc = 'A * 2' - output_type = 'Float32' - - process = geo.RasterMathProcess(inputs=[raster1_io, ], - calc=calc, - output_type=output_type) - try: - process.compute() - self.assertTrue(os.path.exists(process.output.uri)) - oraster, raster1 = [x.read() for x in (process.output, raster1_io)] - # Output raster should be same dimensions as raster 1 - self.assertEquals((oraster.RasterXSize, oraster.RasterYSize), - (raster1.RasterXSize, raster1.RasterYSize)) - orb, r1b = [x.GetRasterBand(1) for x in (oraster, raster1)] - # Maximum value of output should be 2x the max of input raster - self.assertEqual(orb.GetStatistics(False, True)[1], - r1b.GetStatistics(False, True)[1] * 2) - # Datatype of band should be Float32 (== gdal.GDT_Float32 == 6) - self.assertEquals(6, orb.DataType) - self.assertEquals(1.175494351E-38, orb.GetNoDataValue()) - - # Each pixel of output raster should equal 2x input raster - # unless it is a nodata value - ora, r1a = [x.ReadAsArray() for x in (orb, r1b)] - for x in range(raster1.RasterXSize): - for y in range(raster1.RasterYSize): - if r1a[y, x] != r1b.GetNoDataValue(): - self.assertEquals(ora[y, x], r1a[y, x] * 2) - finally: - if process: - process.purge() - - def test_rastermath_logical_operators(self): - """ - Test creation of a masked raster based on logical operators - """ - raster1_io = RasterFileIO( - name='A', uri=os.path.join(testfile_path, 'globalairtemp.tif')) - calc = 'logical_or(logical_and(A >= 27000, A <= 28000), ' \ - 'logical_and(A >= 30000, A <= 31000))' - - process = geo.RasterMathProcess(inputs=[raster1_io, ], calc=calc) - try: - process.compute() - self.assertTrue(os.path.exists(process.output.uri)) - oraster, raster1 = [x.read() for x in (process.output, raster1_io)] - # Output raster should be same dimensions as raster 1 - self.assertEquals((oraster.RasterXSize, oraster.RasterYSize), - (raster1.RasterXSize, raster1.RasterYSize)) - orb, r1b = [x.GetRasterBand(1) for x in (oraster, raster1)] - # Maximum value of output should be 1 - self.assertEqual(orb.GetStatistics(False, True)[1], 1) - # Minimum value of output should be 0 - self.assertEqual(orb.GetStatistics(False, True)[0], 0) - # Pixels should be 1 where source is between 27K-28K or 30-31K - ora, r1a = [x.ReadAsArray() for x in (orb, r1b)] - self.assertTrue(ora[90, 10] == 1 and r1a[90, 10] == 30083) - self.assertTrue(ora[160, 10] == 1 and r1a[160, 10] == 27074) - # Pixels should be 0 where source is not between 27K-28K or 30-31K - ora, r1a = [x.ReadAsArray() for x in (orb, r1b)] - self.assertTrue(ora[120, 10] == 0 and r1a[120, 10] == 29623) - self.assertTrue(ora[175, 10] == 0 and r1a[175, 10] == 23928) - finally: - if process: - process.purge() - - def test_length(self): - """ - Test LengthProcess for vector inputs - """ - vector_roads = VectorFileIO( - uri=os.path.join(testfile_path, 'iraq_roads.geojson'), - filters=[('type', '=', 'motorway'), ('bridge', '=', 1)]) - process = geo.LengthProcess(inputs=[vector_roads]) - try: - process.compute() - with open(os.path.join( - testfile_path, - 'length_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read( - format=geo.formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - if process: - process.purge() - - def test_area(self): - """ - Test AreaProcess for vector inputs - """ - vector1_io = VectorFileIO( - uri=os.path.join(testfile_path, 'baghdad_districts.geojson'), - filters=[('NNAME', 'contains', '^B')]) - process = geo.AreaProcess(inputs=[vector1_io]) - try: - process.compute() - with open(os.path.join( - testfile_path, - 'area_process_results.json')) as exp: - expected_json = json.load(exp) - actual_json = json.loads(process.output.read( - format=geo.formats.JSON)) - self.assertEquals(len(expected_json['features']), - len(actual_json['features'])) - finally: - if process: - process.purge() - - def test_validationInputsPass(self): - """ - Test the GaiaProcess.validate() function - pass on valid input - """ - raster_io = RasterFileIO(uri='/fake/path') - vector_io = VectorFileIO(uri='/fake/path') - try: - geo.ZonalStatsProcess(inputs=[raster_io, vector_io]) - except geo.GaiaException: - self.fail("ZonalProcess should have passed validation but did not") - - def test_validationInputsOrder(self): - """ - Test the GaiaProcess.validate() function - fail on incorrect order - """ - raster_iO = RasterFileIO(uri='/fake/path1') - vector_io = VectorFileIO(uri='/fake/path2') - - with self.assertRaises(geo.GaiaException) as ge: - geo.ZonalStatsProcess(inputs=[vector_io, raster_iO]) - self.assertIn('Input #1 is of incorrect type.', str(ge.exception)) - - def test_validationInputsMin(self): - """ - Test the GaiaProcess.validate() function - fail on < minimum input types - """ - - vector_io = VectorFileIO(uri='/fake/path1') - with self.assertRaises(geo.GaiaException) as ge: - geo.IntersectsProcess(inputs=[vector_io]) - self.assertIn('Not enough inputs for process', str(ge.exception)) - - def test_validationInputsNoMax(self): - """ - Test the GaiaProcess.validate() function - pass on no max input types - """ - - raster_io1 = RasterFileIO(uri='/fake/path1') - raster_io2 = RasterFileIO(uri='/fake/path2') - - try: - geo.RasterMathProcess(inputs=[raster_io1, raster_io2], calc='A+B') - except geo.GaiaException: - self.fail("Multiple inputs should have passed validation") - - def test_validationInputsMax(self): - """ - Test the GaiaProcess.validate() function - fail on > max input types - """ - - vector_io1 = VectorFileIO(uri='/fake/path') - vector_io2 = VectorFileIO(uri='/fake/path') - - with self.assertRaises(geo.GaiaException) as ge: - geo.LengthProcess(inputs=[vector_io1, vector_io2]) - self.assertIn('Incorrect # of inputs; expected 1', str(ge.exception)) - - def test_mercator_geojson(self): - vector_io = VectorFileIO( - uri=os.path.join(testfile_path, 'iraq_hospitals_3857.json')) - self.assertEquals(vector_io.get_epsg(), 3857) - jsonwm = json.loads(vector_io.read(format=geo.formats.JSON)) - self.assertEquals(jsonwm['crs']['properties']['name'], 'EPSG:3857') - self.assertEquals(jsonwm['features'][0]['geometry']['coordinates'], - [4940150.544527022, 3941210.867854486]) - json84 = json.loads(vector_io.read(format=geo.formats.JSON, epsg=4326)) - self.assertEquals(json84['crs']['properties']['name'], 'EPSG:4326') - self.assertEquals(json84['features'][0]['geometry']['coordinates'], - [44.378127400000004, 33.34517919999999]) - - def test_nocrs_wgs84_geojson(self): - vector_io = VectorFileIO( - uri=os.path.join(testfile_path, 'iraq_hospitals.geojson')) - raw_json = json.loads(vector_io.read(format=geo.formats.JSON)) - self.assertFalse(hasattr(raw_json, 'crs')) - epsg = vector_io.get_epsg() - self.assertEquals(epsg, 4326) - - def test_nocrs_mercator_geojson(self): - with open(os.path.join(testfile_path, - 'iraq_hospitals_3857.json')) as injson: - json3857 = json.load(injson) - feature_io = FeatureIO(features=json3857['features']) - epsg = feature_io.get_epsg() - self.assertEquals(epsg, 3857) - feature_json = feature_io.read(format=geo.formats.JSON) - self.assertFalse(hasattr(feature_json, 'crs')) - - def test_within_reproject(self): - """ - Test WithinProcess for vector inputs, where output should be in - same projection as first input (in this case, 3857). - """ - vector1_io = VectorFileIO( - uri=os.path.join(testfile_path, 'iraq_hospitals_3857.json')) - vector2_io = VectorFileIO( - uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) - process = geo.WithinProcess(inputs=[vector1_io, vector2_io]) - try: - process.compute() - self.assertEquals(process.output.data.crs, {'init': u'epsg:3857'}) - self.assertEquals(len(process.output.data), 19) - finally: - if process: - process.purge()