# BerlinMOD Queries

So far we have replicated the BerlinMOD Pymeos tutorial using Pyspark. Now we will execute a subset of the BerlinMOD queries.

In [1]:
cd "../mobilitydb-berlinmod-sf0.1/"

  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


/data/mobilitydb-berlinmod-sf0.1


In [2]:
ls -lh

total 2.9G
-rw-r--r-- 1 root root 4.0M May 31 09:09 Archivo.zip
-rw-r--r-- 1 root root  86K May 30 17:15 brussels_region.csv
-rwxr-xr-x 1 root root 3.3K May  3 17:17 [0m[01;32minstants.csv[0m*
-rwxr-xr-x 1 root root 1.6K May  6 08:42 [01;32mlicences.csv[0m*
-rwxr-xr-x 1 root root  13K May  3 17:18 [01;32mperiods.csv[0m*
-rwxr-xr-x 1 root root 8.9K May  3 17:20 [01;32mpoints.csv[0m*
-rwxr-xr-x 1 root root 148K May  3 17:20 [01;32mregions.csv[0m*
drwxr-xr-x 3 root root   96 Jul 12 12:16 [01;34mspark-warehouse[0m/
-rwxr-xr-x 1 root root 2.8G May  3 17:23 [01;32mtrips.csv[0m*
-rw-r--r-- 1 root root 139K Jun  2 08:30 trips_sample_pymeos.csv
-rw-r--r-- 1 root root  14M May 28 09:19 trips_small.csv
-rw-r--r-- 1 root root   11 May 28 09:11 vehicle_ids.txt
-rwxr-xr-x 1 root root  20K May  3 17:21 [01;32mvehicles.csv[0m*
-rw-r--r-- 1 root root  128 May 28 09:09 vehicles_small.csv


In [3]:
rm -R spark-warehouse/

## Imports

In [4]:
from pymeos import *
from pymeos.plotters import *

from pyspark.sql import SparkSession
from pyspark.sql.types import *
import pyspark.sql.functions as F

from pysparkmeos.UDT.MeosDatatype import *

from pysparkmeos.partitions.grid.grid_partitioner import GridPartition
from pysparkmeos.partitions.kdtree_partitioner import KDTreePartition
from pysparkmeos.partitions.adaptive_partitioner_spark import AdaptiveBinsPartitionerSpark
from pysparkmeos.partitions.approx_adaptive_partitioner import ApproximateAdaptiveBinsPartitioner

from pysparkmeos.utils.udt_appender import *
from pysparkmeos.utils.utils import *

from pysparkmeos.UDF.udf import *
from pysparkmeos.UDTF.BerlinMOD import *

from pysparkmeos.BerlinMOD.config import load_config
from pysparkmeos.BerlinMOD.queries import *
from pysparkmeos.BerlinMOD.transformation_queries import *
from pysparkmeos.BerlinMOD.partition_queries import *
from pysparkmeos.BerlinMOD.func import *

import random, datetime, os, sys
from datetime import timedelta
from functools import partial
from datetime import datetime, timezone
import contextily as cx
import distinctipy
import geopandas as gpd
import pandas as pd
import shapely.geometry as shp

import matplotlib.pyplot as plt
import numpy as np
from shapely import wkb, box, from_wkb
from typing import Union
from time import time

## Spark Initialization

In [5]:
def startspark():
    # Initialize PyMEOS
    pymeos_initialize("UTC")
    
    os.environ['PYSPARK_DRIVER_PYTHON_OPTS']= "notebook"
    os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable
    os.environ['PYSPARK_PYTHON'] = sys.executable
    
    # Initialize a Spark session
    spark = SparkSession.builder \
        .appName("BerlinMOD with PySpark") \
        .master("local[3]") \
        .config("spark.default.parallelism", 12) \
        .config("spark.executor.memory", "3g") \
        .config("spark.executor.cores", 1) \
        .config("spark.driver.memory", "2g") \
        .config("spark.driver.maxResultSize", 0) \
        .config("spark.sql.execution.arrow.maxRecordsPerBatch", "500") \
        .config("spark.sql.allowMultipleTableArguments.enabled", True) \
        .getOrCreate()
        
    # Append the UDT mapping to the PyMEOS classes
    udt_append()
    
    # Get the value of 'spark.default.parallelism'
    default_parallelism = spark.sparkContext.getConf().get("spark.default.parallelism")
    print(f"spark.default.parallelism: {default_parallelism}")

    # Register udfs in Spark SQL
    register_udfs_under_spark_sql(spark)

    # Register the udtfs in Spark SQL
    register_udtfs_under_spark_sql(spark)

    return spark

spark = startspark()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/07/12 12:19:11 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
24/07/12 12:19:13 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


spark.default.parallelism: 12


24/07/12 12:19:18 WARN SimpleFunctionRegistry: The function length replaced a previously registered function.
24/07/12 12:19:18 WARN SimpleFunctionRegistry: The function nearest_approach_distance replaced a previously registered function.


## Load Tables
We will use the power of Spark SQL to read in the raw dataframes and then create the tables.

In [6]:
ls

Archivo.zip          [0m[01;32mperiods.csv[0m*  trips_sample_pymeos.csv  vehicles_small.csv
brussels_region.csv  [01;32mpoints.csv[0m*   trips_small.csv
[01;32minstants.csv[0m*        [01;32mregions.csv[0m*  vehicle_ids.txt
[01;32mlicences.csv[0m*        [01;32mtrips.csv[0m*    [01;32mvehicles.csv[0m*


## Experiments

Here you can run an experiment, select the experiment to run in this notebook.  
Available experiments:
1. Run Queries AS-IS (default PySpark partitioning).
2. Run Queries with Trips partitioned by vehid, using Hash Partitioning.
3. Run Queries with Trips partitioned by trip, using RegularGrid.
4. Run Queries with Trips partitioned by trip, using KDTreePartitioning.
5. Run Queries with Trips partitioned by trip, using AdaptiveBinsPartitioning with Spark background.
6. Run Queries with Trips partitioned by trip, using ApproximateAdaptiveBinsPartitioning.

In [7]:
# Change to your desired experiment number.
run_exp_number = 3

# Select the queries to run 
#querynumbers = [1, 2, 3, 4, 5, 6, 11, 12, 13, 15, 18, 20]
querynumbers = [3]

### Set up the configurations for the experiment

In [8]:
paths = {
    'trips': 'trips_small.csv',
    'instants': 'instants.csv',
    'licences': 'licences.csv',
    'periods': 'periods.csv',
    'points': 'points.csv',
    'regions': 'regions.csv',
    'vehicles': 'vehicles_small.csv'
}

transformation_queries_simple = {
    'trips': transtripssimple,
    'instants': transinstantssimple,
    'periods': transperiodsimple,
    'points': transpointssimple,
    'regions': transregionssimple
}

transformation_queries = {
    'trips': transtrips,
    'instants': transinstants,
    'periods': transperiod,
    'points': transpoints,
    'regions': transregions
}

partition_queries = {
    'trips': parttrips
}

partition_keys = {
    'trips': 'tileid'
}

num_buckets = 8
inferSchema = True
header = True

In [9]:
configs_exp1 = load_config(
    spark=spark, 
    paths=paths, 
    trans_queries=transformation_queries_simple, 
    part_queries=None, 
    partition_keys=None,
    partitioner_class=None,
    partitioner_args=None,
    num_buckets = None,
    inferSchema = inferSchema,
    header=header
)

In [10]:
configs_exp2 = load_config(
    spark=spark, 
    paths=paths, 
    trans_queries=transformation_queries_simple, 
    part_queries=None, 
    partition_keys={'trips': 'vehid'},
    partitioner_class=None,
    partitioner_args=None,
    num_buckets = num_buckets,
    inferSchema = inferSchema,
    header=header
)

In [11]:
configs_exp3 = load_config(
    spark=spark, 
    paths=paths, 
    trans_queries=transformation_queries, 
    part_queries=partition_queries, 
    partition_keys=partition_keys,
    partitioner_class=GridPartition,
    partitioner_args={'cells_per_side': 8},
    num_buckets = num_buckets,
    inferSchema = inferSchema,
    header=header
)

In [12]:
configs_exp4 = load_config(
    spark=spark, 
    paths=paths, 
    trans_queries=transformation_queries, 
    part_queries=partition_queries, 
    partition_keys=partition_keys,
    partitioner_class=KDTreePartition,
    partitioner_args={
        'moving_objects': None, 
        'dimensions': ['x', 'y', 't'], 
        'max_depth': 11},
    num_buckets = num_buckets,
    inferSchema = inferSchema,
    header=header
)

In [13]:
configs_exp5 = load_config(
    spark=spark, 
    paths=paths, 
    trans_queries=transformation_queries, 
    part_queries=partition_queries, 
    partition_keys=partition_keys,
    partitioner_class=AdaptiveBinsPartitionerSpark,
    partitioner_args={
        'spark': spark, 
        'dfname': 'tripsRaw', 
        'colname': 'trip',
        'num_tiles': 8, 
        'dimensions': ['x', 'y', 't'], 
        'utc': "UTC"},
    num_buckets = num_buckets,
    inferSchema = inferSchema,
    header=header
)

In [14]:
configs_exp6 = load_config(
    spark=spark, 
    paths=paths, 
    trans_queries=transformation_queries, 
    part_queries=partition_queries, 
    partition_keys=partition_keys,
    partitioner_class=ApproximateAdaptiveBinsPartitioner,
    partitioner_args={
        'spark': spark,
        'df': None, 
        'colname': 'trip',
        'num_tiles': 8, 
        'dimensions': ['x', 'y', 't'], 
        'utc': "UTC",
        'tablename': "tripsRaw"},
    num_buckets = num_buckets,
    inferSchema = inferSchema,
    header=header
)

In [15]:
experiment_configs = {
    i+1: config 
    for i, config in enumerate([configs_exp1, configs_exp2, configs_exp3, configs_exp4, configs_exp5, configs_exp6])
}
config = experiment_configs[run_exp_number]

queries = {
    1: querytext1,
    2: querytext2,
    3: querytext3,
    4: querytext4,
    5: querytext5,
    6: querytext6,
    11: querytext11,
    12: querytext12,
    13: querytext13,
    15: querytext15,
    18: querytext18,
    20: querytext20
}

descriptions = {
    1: querydesc1,
    2: querydesc2,
    3: querydesc3,
    4: querydesc4,
    5: querydesc5,
    6: querydesc6,
    11: querydesc11,
    12: querydesc12,
    13: querydesc13,
    15: querydesc15,
    18: querydesc18,
    20: querydesc20
}

queries_to_run = [queries[querynum] for querynum in querynumbers if querynum in queries]
descriptions_to_run = [descriptions[querynum] for querynum in querynumbers if querynum in descriptions]

### Run the experiment

#### Create Tables

In [16]:
tables, stats = load_all_tables(spark, config)

Reading raw csv  trips_small.csv


                                                                                

Creating temp view of raw table
+------+-----+----------+-----+----------+----------+--------------------+--------------------+-------+
|tripid|vehid|       day|seqno|sourcenode|targetnode|                trip|          trajectory|licence|
+------+-----+----------+-----+----------+----------+--------------------+--------------------+-------+
|     1|    1|2020-06-01|    1|     79113|     66276|[0101000020110F00...|0102000020110F000...|   NULL|
+------+-----+----------+-----+----------+----------+--------------------+--------------------+-------+

Schema and statistics of raw table
root
 |-- tripid: integer (nullable = true)
 |-- vehid: integer (nullable = true)
 |-- day: date (nullable = true)
 |-- seqno: integer (nullable = true)
 |-- sourcenode: integer (nullable = true)
 |-- targetnode: integer (nullable = true)
 |-- trip: string (nullable = true)
 |-- trajectory: string (nullable = true)
 |-- licence: string (nullable = true)



24/07/12 12:19:37 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

+-------+------------------+-----------------+------------------+------------------+------------------+--------------------+--------------------+-------+
|summary|            tripid|            vehid|             seqno|        sourcenode|        targetnode|                trip|          trajectory|licence|
+-------+------------------+-----------------+------------------+------------------+------------------+--------------------+--------------------+-------+
|  count|               124|              124|               124|               124|               124|                 124|                 124|      0|
|   mean| 405.9032258064516|14.42741935483871| 2.943548387096774|          36600.25|          36600.25|                NULL|                NULL|   NULL|
| stddev|243.83028948596683|8.369683374548373|2.3798012465853966|26819.557657976246|26819.557657976246|                NULL|                NULL|   NULL|
|    min|                 1|                1|                 1|           

                                                                                

+------+-----+----------+-----+----------+----------+--------------------+--------------------+-------+
|tripid|vehid|       day|seqno|sourcenode|targetnode|                trip|          trajectory|licence|
+------+-----+----------+-----+----------+----------+--------------------+--------------------+-------+
|     1|    1|2020-06-01|    1|     79113|     66276|[POINT(496253.840...|LINESTRING (49625...|   NULL|
|     2|    1|2020-06-01|    2|     66276|     79113|[POINT(481241.171...|LINESTRING (48124...|   NULL|
|     3|    1|2020-06-02|    1|     79113|     66276|[POINT(496253.840...|LINESTRING (49625...|   NULL|
|     4|    1|2020-06-02|    2|     66276|     79113|[POINT(481241.171...|LINESTRING (48124...|   NULL|
|     5|    1|2020-06-03|    1|     79113|     66276|[POINT(496253.840...|LINESTRING (49625...|   NULL|
+------+-----+----------+-----+----------+----------+--------------------+--------------------+-------+



                                                                                

Bounds:  STBOX XT(((473277.05262936745,6579811.389156611),(498784.34433982597,6606871.682578203)),[2020-06-01 06:01:41.054+00, 2020-06-11 19:49:59.511256+00])
Time to create partitioning grid:  0.2826817035675049  seconds.
+------+--------------------+
|tileid|                tile|
+------+--------------------+
|     0|STBOX XT(((473277...|
|     1|STBOX XT(((473277...|
|     2|STBOX XT(((473277...|
|     3|STBOX XT(((473277...|
|     4|STBOX XT(((473277...|
|     5|STBOX XT(((473277...|
|     6|STBOX XT(((473277...|
|     7|STBOX XT(((473277...|
|     8|STBOX XT(((473277...|
|     9|STBOX XT(((473277...|
|    10|STBOX XT(((473277...|
|    11|STBOX XT(((473277...|
|    12|STBOX XT(((473277...|
|    13|STBOX XT(((473277...|
|    14|STBOX XT(((473277...|
|    15|STBOX XT(((473277...|
|    16|STBOX XT(((473277...|
|    17|STBOX XT(((473277...|
|    18|STBOX XT(((473277...|
|    19|STBOX XT(((473277...|
+------+--------------------+
only showing top 20 rows

Creating partitioned table... 


                                                                                

Final table created in 18.91962742805481 seconds


                                                                                

Final table trips schema:
root
 |-- vehid: integer (nullable = true)
 |-- day: integer (nullable = true)
 |-- seqno: integer (nullable = true)
 |-- sourcenode: integer (nullable = true)
 |-- targetnode: string (nullable = true)
 |-- trajectory: pythonuserdefined (nullable = true)
 |-- license: string (nullable = true)
 |-- movingobjectid: string (nullable = true)
 |-- tileid: integer (nullable = true)
 |-- movingobject: pythonuserdefined (nullable = true)

Reading raw csv  instants.csv


                                                                                

Creating temp view of raw table
+---------+--------------------+
|instantid|             instant|
+---------+--------------------+
|        1|2020-06-01 19:44:...|
+---------+--------------------+

Schema and statistics of raw table
root
 |-- instantid: integer (nullable = true)
 |-- instant: timestamp (nullable = true)

+-------+------------------+
|summary|         instantid|
+-------+------------------+
|  count|               100|
|   mean|              50.5|
| stddev|29.011491975882016|
|    min|                 1|
|    max|               100|
+-------+------------------+

Creating final table instants based on instantsRawNoCache, partitioned by None.


                                                                                

+---------+------+--------------------+
|instantid|tileid|             instant|
+---------+------+--------------------+
|        1|     0|t@2020-06-01 19:4...|
|        1|     8|t@2020-06-01 19:4...|
|        1|    16|t@2020-06-01 19:4...|
|        1|    24|t@2020-06-01 19:4...|
|        1|    32|t@2020-06-01 19:4...|
+---------+------+--------------------+



                                                                                

Final table created in 1.6655898094177246 seconds
Final table instants schema:
root
 |-- instantid: integer (nullable = true)
 |-- tileid: integer (nullable = true)
 |-- instant: pythonuserdefined (nullable = true)

Reading raw csv  licences.csv
Creating temp view of raw table
+---------+--------+-----+
|licenceid| licence|vehid|
+---------+--------+-----+
|        1|B-QS 276|  276|
+---------+--------+-----+

Schema and statistics of raw table
root
 |-- licenceid: integer (nullable = true)
 |-- licence: string (nullable = true)
 |-- vehid: integer (nullable = true)



                                                                                

+-------+------------------+--------+------------------+
|summary|         licenceid| licence|             vehid|
+-------+------------------+--------+------------------+
|  count|               101|     101|               101|
|   mean|              51.0|    NULL|319.46534653465346|
| stddev|29.300170647967224|    NULL| 175.0106604956644|
|    min|                 1|B-BJ 115|                 9|
|    max|               101|B-[U 177|               622|
+-------+------------------+--------+------------------+

Creating final table licences based on licencesRawNoCache, partitioned by None.
Final table created in 1.2520778179168701 seconds
Final table licences schema:
root
 |-- licenceid: integer (nullable = true)
 |-- licence: string (nullable = true)
 |-- vehid: integer (nullable = true)

Reading raw csv  periods.csv
Creating temp view of raw table
+--------+--------------------+--------------------+--------------------+
|periodid|              beginp|                endp|              p

                                                                                

+--------------------+--------------------+--------+------+--------------------+
|              beginp|                endp|periodid|tileid|              period|
+--------------------+--------------------+--------+------+--------------------+
|2020-06-09 11:15:...|2020-06-09 20:38:...|       1|     6|[2020-06-09 11:15...|
|2020-06-09 11:15:...|2020-06-09 20:38:...|       1|    14|[2020-06-09 11:15...|
|2020-06-09 11:15:...|2020-06-09 20:38:...|       1|    22|[2020-06-09 11:15...|
|2020-06-09 11:15:...|2020-06-09 20:38:...|       1|    30|[2020-06-09 11:15...|
|2020-06-09 11:15:...|2020-06-09 20:38:...|       1|    38|[2020-06-09 11:15...|
+--------------------+--------------------+--------+------+--------------------+



                                                                                

Final table created in 1.4912188053131104 seconds
Final table periods schema:
root
 |-- beginp: timestamp (nullable = true)
 |-- endp: timestamp (nullable = true)
 |-- periodid: integer (nullable = true)
 |-- tileid: integer (nullable = true)
 |-- period: pythonuserdefined (nullable = true)

Reading raw csv  points.csv
Creating temp view of raw table
+-------+-----------------+-----------------+--------------------+
|pointid|             posx|             posy|                geom|
+-------+-----------------+-----------------+--------------------+
|      1|476191.0852037612|6589454.831155596|0101000020110F000...|
+-------+-----------------+-----------------+--------------------+

Schema and statistics of raw table
root
 |-- pointid: integer (nullable = true)
 |-- posx: double (nullable = true)
 |-- posy: double (nullable = true)
 |-- geom: string (nullable = true)



                                                                                

+-------+------------------+-----------------+-----------------+--------------------+
|summary|           pointid|             posx|             posy|                geom|
+-------+------------------+-----------------+-----------------+--------------------+
|  count|               100|              100|              100|                 100|
|   mean|              50.5|486384.3413598945|6594038.933758076|                NULL|
| stddev|29.011491975882016|7200.526060474747|6552.156274876073|                NULL|
|    min|                 1|472428.0634008836|6577421.541139536|0101000020110F000...|
|    max|               100| 498913.875699313|6607119.513588189|0101000020110F000...|
+-------+------------------+-----------------+-----------------+--------------------+

Creating final table points based on pointsRawNoCache, partitioned by None.


                                                                                

+-----------------+-----------------+-------+------+--------------------+
|             posx|             posy|pointid|tileid|                geom|
+-----------------+-----------------+-------+------+--------------------+
|476191.0852037612|6589454.831155596|      1|    16|POINT (476191.085...|
|476191.0852037612|6589454.831155596|      1|    17|POINT (476191.085...|
|476191.0852037612|6589454.831155596|      1|    18|POINT (476191.085...|
|476191.0852037612|6589454.831155596|      1|    19|POINT (476191.085...|
|476191.0852037612|6589454.831155596|      1|    20|POINT (476191.085...|
+-----------------+-----------------+-------+------+--------------------+



                                                                                

Final table created in 1.9005284309387207 seconds
Final table points schema:
root
 |-- posx: double (nullable = true)
 |-- posy: double (nullable = true)
 |-- pointid: integer (nullable = true)
 |-- tileid: integer (nullable = true)
 |-- geom: pythonuserdefined (nullable = true)

Reading raw csv  regions.csv
Creating temp view of raw table
+--------+--------------------+
|regionid|                geom|
+--------+--------------------+
|       1|0103000020110F000...|
+--------+--------------------+

Schema and statistics of raw table
root
 |-- regionid: integer (nullable = true)
 |-- geom: string (nullable = true)

+-------+------------------+--------------------+
|summary|          regionid|                geom|
+-------+------------------+--------------------+
|  count|               100|                 100|
|   mean|              50.5|                NULL|
| stddev|29.011491975882016|                NULL|
|    min|                 1|0103000020110F000...|
|    max|               100|0

                                                                                

+--------+------+--------------------+
|regionid|tileid|                geom|
+--------+------+--------------------+
|       1|   200|POLYGON ((483571....|
|       1|   201|POLYGON ((483571....|
|       1|   202|POLYGON ((483571....|
|       1|   203|POLYGON ((483571....|
|       1|   204|POLYGON ((483571....|
+--------+------+--------------------+



                                                                                

Final table created in 1.5360839366912842 seconds
Final table regions schema:
root
 |-- regionid: integer (nullable = true)
 |-- tileid: integer (nullable = true)
 |-- geom: pythonuserdefined (nullable = true)

Reading raw csv  vehicles_small.csv
Creating temp view of raw table
+-----+-------+---------+-----------+
|vehid|licence|     type|      model|
+-----+-------+---------+-----------+
|    1| B-EF 1|passenger|Sachsenring|
+-----+-------+---------+-----------+

Schema and statistics of raw table
root
 |-- vehid: integer (nullable = true)
 |-- licence: string (nullable = true)
 |-- type: string (nullable = true)
 |-- model: string (nullable = true)



                                                                                

+-------+----------------+-------+-----+--------+
|summary|           vehid|licence| type|   model|
+-------+----------------+-------+-----+--------+
|  count|               4|      4|    4|       4|
|   mean|           14.25|   NULL| NULL|    NULL|
| stddev|9.63932916061417|   NULL| NULL|    NULL|
|    min|               1|B-CJ 17|  bus|Multicar|
|    max|              24|B-PZ 15|truck|Wartburg|
+-------+----------------+-------+-----+--------+

Creating final table vehicles based on vehiclesRawNoCache, partitioned by None.
Final table created in 1.154970645904541 seconds
Final table vehicles schema:
root
 |-- vehid: integer (nullable = true)
 |-- licence: string (nullable = true)
 |-- type: string (nullable = true)
 |-- model: string (nullable = true)



#### Execute Queries

In [43]:
def f_ever_intersects(traj, other):
    return traj.ever

@F.pandas_udf(returnType=BooleanType())
def pandas_ever_intersects(trajs: pd.Series, others: pd.Series) -> pd.Series: 
    pymeos_initialize()
    df = pd.DataFrame()
    df['trajs'] = trajs.apply(TGeomPointSeqSetWrap)
    df['others'] = others.str.replace("POINT (", "").str.replace(")", "")
    df['others'] = df['others'].apply(lambda x: Point(float(x.split()[0]), float(x.split()[1])))
    resp = df.apply(lambda x: x.trajs.ever_intersects(x.others), axis=1)
    return resp

spark.udf.register("pandas_ever_intersects", pandas_ever_intersects)
#spark.sql("SELECT pandas_ever_intersects(t.movingobject, p.geom) FROM trips t INNER JOIN points p ON (t.tileid = p.tileid)").show()

querydesc4 = "Query 4: Which licence plate numbers belong to vehicles that have passed the points from QueryPoints?"
querytext4 = """
    WITH vehids_intersect AS (
        SELECT t.vehid, pandas_ever_intersects(t.movingobject, p.geom)
        FROM trips t INNER JOIN points p ON (t.tileid=p.tileid)
        WHERE 
            pandas_ever_intersects(t.movingobject, p.geom) = TRUE
    )
    SELECT DISTINCT vi.vehid, v.licence
    FROM vehids_intersect vi INNER JOIN vehicles v ON (vi.vehid=v.vehid)
"""

query_exec(querytext4, querydesc4, spark, explain=False)

24/07/12 13:53:28 WARN SimpleFunctionRegistry: The function pandas_ever_intersects replaced a previously registered function.


Query 4: Which licence plate numbers belong to vehicles that have passed the points from QueryPoints?


                                                                                

+-----+-------+
|vehid|licence|
+-----+-------+
+-----+-------+

Query execution time:  5.410356760025024  seconds.


(DataFrame[vehid: int, licence: string],
 (1720792408.5752165, 1720792413.9855733, 5.410356760025024),
 None)

In [None]:
qdfs_exp, stats_exp = run_all_queries(
    queries_to_run, 
    descriptions_to_run, 
    spark, 
    explain=True, 
    printplan=False
)

## Mapping the regions and trips

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(15, 15))
brussels = pd.read_csv(
    "brussels_region.csv", converters={"geom": partial(wkb.loads, hex=True)}
)
brussels = gpd.GeoDataFrame(brussels, geometry="geom")
brussels_geom = brussels["geom"][0]
brussels.plot(ax=ax, alpha=0.3, color='black')
cx.add_basemap(ax, alpha=0.3)
grid = spark.table('grid')

for gridrow in grid.toLocalIterator():
    gridrow.tile.plot_xy(axes=ax, color="black", draw_filling=False)

regions = spark.table('regions').select("regionid", "geom").distinct()

for regionrow in regions.toLocalIterator():
    myPoly = gpd.GeoSeries([regionrow.geom])
    myPoly.plot(ax=ax, alpha=0.6, color='lightgreen')
    
#trips = spark.table('trips').sample(0.1, seed=3).select('movingobjectid', 'movingobject')
trips = spark.table('trips').select('movingobjectid', 'movingobject')
colors = ['orange', 'red', 'yellow', 'blue', 'purple']
for triprow in trips.toLocalIterator():
    TemporalPointSequenceSetPlotter.plot_xy(
        triprow.movingobject, axes=ax, show_markers=True, show_grid=False, color=colors[int(triprow.movingobjectid) % len(colors)]
    )

#extent = ax.get_tightbbox(fig.canvas.get_renderer()).transformed(fig.dpi_scale_trans.inverted())
#fig.savefig(f'BerlinMODSampleplot.svg', bbox_inches=extent)  # Adjust expanded() parameters as needed

plt.title("BerlinMOD Sample Trajectories Plot")