In [0]:
from pyspark.sql import SparkSession
spark = SparkSession.builder \
      .master("yarn") \
      .appName("pyspark-udf-li-sdk") \
      .config("spark.sql.legacy.allowUntypedScalaUDF", True) \
      .getOrCreate();
spark.sparkContext.addPyFile('/location_intelligence_bigdata_li_sdk_pyspark_5.2.1.zip')

In [0]:
# Importing the Required Classes
from li.SQLRegistrator import SQLRegistrator
# Register the LI UDFs
SQLRegistrator.registerAll()

# input file path
poly_file_path = "/STATES.csv"
df = spark.read.csv(poly_file_path, header=True, inferSchema=True)

# Create the TemporaryTable from input dataframe
df.createOrReplaceTempView("polygontest")

## **Constructor and Persistence Functions**

In [0]:
# Geometry from WKT
geoFromWKT = spark.sql("SELECT ST_GeomFromWKT(WKT) as Geometry, State_Name, State FROM polygontest")

# Geometry from GeoJSON
GeoFromGeoJSON = spark.sql("SELECT ST_GeomFromGeoJSON(ST_ToGeoJSON(ST_GeomFromWKT(WKT))) as Geometry, State_Name, State FROM polygontest")

# Geometry from WKB
GeoFromWKB = spark.sql("SELECT ST_GeomFromWKB(ST_ToWKB(ST_GeomFromWKT(WKT))) as Geometry, State_Name, State FROM polygontest")

# Geometry from KML
GeoFromKML = spark.sql("SELECT ST_GeomFromKML(ST_ToKML(ST_GeomFromWKT(WKT))) as Geometry, State_Name, State FROM polygontest")

# Geometry from Point
GeoFromPoint = spark.sql("SELECT ST_Point(-73.750333 , 42.736103) as Geometry")


## **Predicate Functions**

In [0]:
# Prepare two views
df1, df2 = df.randomSplit([0.5, 0.5])
df1.createOrReplaceTempView("geometry1")
df2.createOrReplaceTempView("geometry2")

# Check Disjoint
disjoint = spark.sql("SELECT ST_Disjoint(ST_GeomFromWKT(t1.WKT), ST_GeomFromWKT(t2.WKT)) as IsDisjoint FROM geometry1 t1, geometry2 t2");

# Check Intersects
intersect = spark.sql("SELECT ST_Intersects(ST_GeomFromWKT(t1.WKT), ST_GeomFromWKT(t2.WKT)) as Intersects FROM geometry1 t1, geometry2 t2");

# Check Overlap
overlap = spark.sql("SELECT ST_Overlaps(ST_GeomFromWKT(t1.WKT), ST_GeomFromWKT(t2.WKT)) as Overlap FROM geometry1 t1, geometry2 t2");

# Check within
within = spark.sql("SELECT ST_Within(ST_GeomFromWKT(t1.WKT), ST_GeomFromWKT(t2.WKT)) as Within FROM geometry1 t1, geometry2 t2");

# Check NullGeometry
nullGeo = spark.sql("SELECT ST_IsNullGeom(ST_GeomFromWKT(t1.WKT)) as IsNullGeometry FROM geometry1 t1");


## **Measurement Functions**

In [0]:
# Area
getAreaInMile = spark.sql("SELECT ST_Area(ST_GeomFromWKT(WKT), 'sq mi', 'SPHERICAL') as Calculated_Area_Mile FROM polygontest")
getAreaInMile.show(2)
getAreaInKM = spark.sql("SELECT ST_Area(ST_GeomFromWKT(WKT), 'sq km', 'SPHERICAL') as Calculated_Area_KM FROM polygontest")

# Distance
getDistance = spark.sql("SELECT ST_Distance(ST_GeomFromWKT(t1.WKT), ST_GeomFromWKT(t2.WKT), 'm', 'SPHERICAL') FROM geometry1 t1, geometry2 t2")

# Length 
getLengthInKM = spark.sql("SELECT ST_Length(ST_GeomFromWKT(WKT), 'm', 'CARTESIAN') as Calculated_Length_KM FROM polygontest")

# Perimeter
getPerimeter = spark.sql("SELECT ST_Perimeter(ST_GeomFromWKT(WKT), 'km', 'SPHERICAL') as Calculated_Perimeter_KM FROM polygontest")


## **Processing Functions**

In [0]:
# Buffer
getBuffer = spark.sql("SELECT ST_Buffer(ST_GeomFromWKT(WKT), 5.0 , 'km', 4, 'SPHERICAL') as Calculated_Buffer_KM FROM polygontest")

# convex hull
convexhull = spark.sql("SELECT ST_ConvexHull(ST_GeomFromWKT(WKT)) as Convex_Hull FROM polygontest")

# Intersection
intersection = spark.sql("SELECT ST_Intersection(ST_GeomFromWKT(t1.WKT), ST_GeomFromWKT(t2.WKT)) as Intersection FROM geometry1 t1, geometry2 t2");

# Transform
tranform = spark.sql("SELECT ST_Transform(ST_GeomFromWKT(WKT), 'epsg:3857') as Tranform FROM polygontest")

# union
union = spark.sql("SELECT ST_Intersection(ST_GeomFromWKT(t1.WKT), ST_GeomFromWKT(t2.WKT)) as Unions FROM geometry1 t1, geometry2 t2");


## **Observer Functions**

In [0]:
# ST_X
ST_X = spark.sql("SELECT ST_X(ST_Point(-73.750333 , 42.736103)) as X_Cordinate")

# ST_XMax
ST_XMax = spark.sql("SELECT  ST_XMax(ST_Point(33.750333 , 42.736103)) as X_Max_Cordinate")

# ST_Xmin
ST_XMin = spark.sql("SELECT  ST_XMin(ST_Point(33.750333 , 42.736103)) as X_Min_Cordinate")

# ST_Y
ST_Y = spark.sql("SELECT ST_Y(ST_Point(-73.750333 , 42.736103)) as Y_Cordinate")

# ST_YMax
ST_YMax = spark.sql("SELECT  ST_YMax(ST_Point(33.750333 , 42.736103)) as Y_Max_Cordinate")

# ST_Ymin
ST_YMin = spark.sql("SELECT  ST_YMin(ST_Point(33.750333 , 42.736103)) as Y_Min_Cordinate")


## **Grid Function**

In [0]:
# GeohashId
GeoHashId = spark.sql("SELECT ST_GeoHash(ST_Point(-73.750333, 42.736103),3) as GeoHashId")

# GeoHashBoundary
GeoHashBoundary = spark.sql("SELECT ST_GeoHashBoundary('dre') as GeoHashBoundary")

# HexHash
HexHash = spark.sql("SELECT ST_HexHash(ST_Point(-73.750333, 42.736103),3) as HexHash")

# HexagonBoundary
HexagonBoundary = spark.sql("SELECT ST_HexHashBoundary('PF704') as HexagonBoundary")

# ST_SquareHash
SquareHash = spark.sql("SELECT ST_SquareHash(ST_Point(-73.750333, 42.736103),3) as SquareHash")

# GeoHashBoundary
SquareHashBoundary = spark.sql("SELECT ST_SquareHashBoundary('030') as SquareHashBoundary")
