In [19]:
import geomesa_pyspark
conf = geomesa_pyspark.configure(
 jars=['/usr/lib/spark/jars/geomesa-hbase-spark-runtime_2.11-2.1.0-m.2.jar'],
 packages=['geomesa_pyspark','pytz'],
 spark_home='/usr/lib/spark/').\
 setAppName('MyTestApp')
conf.get('spark.master')

# u'yarn'

from pyspark.sql import SparkSession
spark = ( SparkSession
 .builder
 .config(conf=conf)
 .getOrCreate()
)

In [20]:
params = {
 "hbase.zookeepers": "hbase.optix-ons-local:2181",
 "hbase.catalog": "ons-historical"
}
feature = "adsbx"
adsbx = ( spark
 .read
 .format("geomesa")
 .options(**params)
 .option("geomesa.feature", feature)
 .load()
)
adsbx.createOrReplaceTempView("adsbx")

In [None]:
spark.sql("""
select
 count(distinct Icao) as num_planes
from adsbx
where st_contains(st_bufferPoint(st_makePoint(55.31, 25.26), 50000), geom)
 and dtg > cast('2018-10-13' as timestamp)
 and dtg < cast('2018-10-20' as timestamp)
""").show()


In [8]:
spark.sql("""
select
 num_planes,
 date_format(dtg_sub, "YYYY-MM-dd") as day
from (
 select
 count(distinct Icao) as num_planes,
 /* Convert to 00:00:00 */
 date_sub(dtg, 0) as dtg_sub
 FROM (
 select Icao, dtg
 from adsbx
 where st_contains(st_bufferPoint(st_makePoint(55.31, 25.26), 50000), geom)
 and dtg > cast('2018-10-13' as timestamp)
 and dtg < cast('2018-10-20' as timestamp)
 )
 group by
 dtg_sub
 order by
 dtg_sub
)
""").show()

+----------+----------+
|num_planes|       day|
+----------+----------+
|       663|2018-10-13|
|       506|2018-10-14|
|       576|2018-10-15|
|       646|2018-10-16|
|       662|2018-10-17|
|       655|2018-10-18|
|       663|2018-10-19|
+----------+----------+

In [10]:
# Python - exactEarth

import geomesa_pyspark
conf = geomesa_pyspark.configure(
 jars=['/usr/lib/spark/jars/geomesa-hbase-spark-runtime_2.11-2.1.0-m.2.jar'],
 packages=['geomesa_pyspark','pytz'],
 spark_home='/usr/lib/spark/').\
 setAppName('MyTestApp')
conf.get('spark.master')
# u'yarn'
from pyspark.sql import SparkSession
spark = ( SparkSession
 .builder
 .config(conf=conf)
 .getOrCreate()
)



In [13]:
params = {
 "hbase.zookeepers": "hbase.optix-ons-local:2181",
 "hbase.catalog": "ons-historical"
}
feature = "ee"
ee = ( spark
 .read
 .format("geomesa")
 .options(**params)
 .option("geomesa.feature", feature)
 .load()
)
ee.createOrReplaceTempView("ee")

In [12]:
#result set will be very large, show only 10 results for this example
spark.sql("""
select
 dt_pos_utc, mmsi, vessel_name, longitude, latitude
FROM
 ee
WHERE
 mmsi = 366206000
 AND dt_pos_utc > date_add(current_timestamp(), -1)
""").show(10)

+-------------------+---------+-----------+------------------+-----------------+
|         dt_pos_utc|     mmsi|vessel_name|         longitude|         latitude|
+-------------------+---------+-----------+------------------+-----------------+
|2020-09-04 00:02:37|366206000|CAPE EDMONT|            -79.94|            32.85|
|2020-09-04 00:04:45|366206000|CAPE EDMONT|-79.94040333333334|32.85048833333333|
|2020-09-04 00:05:37|366206000|CAPE EDMONT|            -79.94|            32.85|
|2020-09-04 00:05:51|366206000|CAPE EDMONT|            -79.94|            32.85|
|2020-09-04 00:08:37|366206000|CAPE EDMONT|            -79.94|            32.85|
|2020-09-04 00:11:37|366206000|CAPE EDMONT|            -79.94|            32.85|
|2020-09-04 00:14:36|366206000|CAPE EDMONT|            -79.94|            32.85|
|2020-09-04 00:14:54|366206000|CAPE EDMONT|            -79.94|            32.85|
|2020-09-04 00:16:48|366206000|CAPE EDMONT|-79.94044166666667|32.85050666666667|
|2020-09-04 00:17:37|3662060