-
Notifications
You must be signed in to change notification settings - Fork 160
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #56 from Esri/genericudf
Generic UDF and performance improvements
- Loading branch information
Showing
16 changed files
with
342 additions
and
268 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
112 changes: 112 additions & 0 deletions
112
hive/src/main/java/com/esri/hadoop/hive/HiveGeometryOIHelper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
package com.esri.hadoop.hive; | ||
|
||
import org.apache.hadoop.hive.ql.exec.UDFArgumentException; | ||
import org.apache.hadoop.hive.ql.metadata.HiveException; | ||
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; | ||
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; | ||
import org.apache.hadoop.io.BytesWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.log4j.Logger; | ||
|
||
import com.esri.core.geometry.ogc.OGCGeometry; | ||
|
||
public class HiveGeometryOIHelper { | ||
|
||
static Logger LOG = Logger.getLogger(HiveGeometryOIHelper.class); | ||
|
||
private PrimitiveObjectInspector oi; | ||
private int argIndex; | ||
private boolean isConstant; | ||
|
||
OGCGeometry constantGeometry; | ||
|
||
private HiveGeometryOIHelper(ObjectInspector oi, int argIndex) { | ||
this.oi = (PrimitiveObjectInspector)oi; | ||
this.argIndex = argIndex; | ||
|
||
// constant geometries only need to be processed once and can | ||
// be optimized in certain operations | ||
isConstant = ObjectInspectorUtils.isConstantObjectInspector(oi); | ||
} | ||
|
||
public static HiveGeometryOIHelper create(ObjectInspector oi, int argIndex) throws UDFArgumentException { | ||
if (oi.getCategory() != Category.PRIMITIVE) { | ||
throw new UDFArgumentException("Only primitive types current supported"); | ||
} | ||
|
||
return new HiveGeometryOIHelper(oi, argIndex); | ||
} | ||
|
||
/** | ||
* Gets whether this geometry argument is constant. | ||
* | ||
* @return | ||
*/ | ||
public boolean isConstant() { | ||
return isConstant; | ||
} | ||
|
||
/** | ||
* Returns the cached constant geometry object. | ||
* | ||
* @return cache geometry, or null if not constant | ||
*/ | ||
public OGCGeometry getConstantGeometry() { | ||
return constantGeometry; | ||
} | ||
|
||
/** | ||
* Reads the corresponding geometry from the deferred object list | ||
* or returns the cached geometry if argument is constant. | ||
* | ||
* @param args | ||
* @return | ||
*/ | ||
public OGCGeometry getGeometry(DeferredObject[] args) { | ||
if (isConstant) { | ||
if (constantGeometry == null) { | ||
constantGeometry = getGeometry(args[argIndex]); | ||
} | ||
|
||
return constantGeometry; | ||
} else { | ||
// not constant, so we have to rebuild the geometry | ||
// on every call | ||
return getGeometry(args[argIndex]); | ||
} | ||
} | ||
|
||
private OGCGeometry getGeometry(DeferredObject arg) { | ||
Object writable; | ||
try { | ||
writable = oi.getPrimitiveWritableObject(arg.get()); | ||
} catch (HiveException e) { | ||
LOG.error("Failed to get writable", e); | ||
return null; | ||
} | ||
|
||
if (writable == null) { | ||
return null; | ||
} | ||
|
||
switch (oi.getPrimitiveCategory()) { | ||
case BINARY: return GeometryUtils.geometryFromEsriShape((BytesWritable)writable); | ||
case STRING: return OGCGeometry.fromText(((Text)writable).toString()); | ||
default: return null; | ||
} | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
StringBuilder builder = new StringBuilder(); | ||
|
||
builder.append("HiveGeometryHelper("); | ||
builder.append("constant=" + isConstant + ";"); | ||
builder.append(")"); | ||
|
||
return builder.toString(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,29 @@ | ||
package com.esri.hadoop.hive; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
import org.apache.hadoop.hive.ql.exec.Description; | ||
import org.apache.hadoop.io.BooleanWritable; | ||
import org.apache.hadoop.io.BytesWritable; | ||
import org.apache.hadoop.hive.ql.udf.UDFType; | ||
|
||
import com.esri.core.geometry.ogc.OGCGeometry; | ||
import com.esri.core.geometry.OperatorContains; | ||
import com.esri.core.geometry.OperatorSimpleRelation; | ||
|
||
@UDFType(deterministic = true) | ||
@Description( | ||
name = "ST_Contains", | ||
value = "_FUNC_(geometry1, geometry2) - return true if geometry1 contains geometry2", | ||
extended = "Example:\n" + | ||
"SELECT _FUNC_(st_polygon(1,1, 1,4, 4,4, 4,1), st_point(2, 3) from src LIMIT 1; -- return true\n" + | ||
"SELECT _FUNC_(st_polygon(1,1, 1,4, 4,4, 4,1), st_point(8, 8) from src LIMIT 1; -- return false" | ||
) | ||
|
||
name = "ST_Contains", | ||
value = "_FUNC_(geometry1, geometry2) - return true if geometry1 contains geometry2", | ||
extended = "Example:\n" + | ||
"SELECT _FUNC_(st_polygon(1,1, 1,4, 4,4, 4,1), st_point(2, 3) from src LIMIT 1; -- return true\n" + | ||
"SELECT _FUNC_(st_polygon(1,1, 1,4, 4,4, 4,1), st_point(8, 8) from src LIMIT 1; -- return false" | ||
) | ||
public class ST_Contains extends ST_GeometryRelational { | ||
|
||
static final Log LOG = LogFactory.getLog(ST_Contains.class.getName()); | ||
final BooleanWritable resultBoolean = new BooleanWritable(); | ||
|
||
public BooleanWritable evaluate(BytesWritable geometryref1, BytesWritable geometryref2) | ||
{ | ||
if (geometryref1 == null || geometryref2 == null || | ||
geometryref1.getLength() == 0 || geometryref2.getLength() == 0) { | ||
LogUtils.Log_ArgumentsNull(LOG); | ||
return null; | ||
} | ||
|
||
if (!GeometryUtils.compareSpatialReferences(geometryref1, geometryref2)) { | ||
LogUtils.Log_SRIDMismatch(LOG, geometryref1, geometryref2); | ||
return null; | ||
} | ||
|
||
OGCGeometry ogcGeom1 = GeometryUtils.geometryFromEsriShape(geometryref1); | ||
OGCGeometry ogcGeom2 = GeometryUtils.geometryFromEsriShape(geometryref2); | ||
if (ogcGeom1 == null || ogcGeom2 == null){ | ||
LogUtils.Log_ArgumentsNull(LOG); | ||
return null; | ||
} | ||
|
||
resultBoolean.set(ogcGeom1.contains(ogcGeom2)); | ||
return resultBoolean; | ||
@Override | ||
protected OperatorSimpleRelation getRelationOperator() { | ||
return OperatorContains.local(); | ||
} | ||
|
||
@Override | ||
public String getDisplayString(String[] args) { | ||
return String.format("returns true if %s contains %s", args[0], args[1]); | ||
} | ||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.