# Microbenchmarks on GPU
This is a notebook for microbenchmarks running on GPU. 

In [None]:
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from time import time
import os
# Change to your cluster ip:port and directories
SPARK_MASTER_URL = os.getenv("SPARK_MASTER_URL", "spark:your-ip:port")
RAPIDS_JAR = os.getenv("RAPIDS_JAR", "/your-path/rapids-4-spark_2.12-25.08.0.jar")
DATA_ROOT = os.getenv("DATA_ROOT", "/data")

Run the example with retryTimes

In [None]:
def runMicroBenchmark(spark, appName, query, retryTimes):
    count = 0
    total_time = 0
    # You can print the physical plan of each query
    # spark.sql(query).explain()
    while count < retryTimes:
        start = time()
        spark.sql(query).show(5)
        end = time()
        total_time += round(end - start, 2)
        count = count + 1
        print("Retry times : {}, ".format(count) + appName + " microbenchmark takes {} seconds".format(round(end - start, 2)))
    print(appName + " microbenchmark takes average {} seconds after {} retries".format(round(total_time/retryTimes),retryTimes))
    with open('result.txt', 'a') as file:
        file.write("{},{},{}\n".format(appName, round(total_time/retryTimes), retryTimes))

In [26]:
# You need to update with your real hardware resource 
driverMem = os.getenv("DRIVER_MEM", "50g")
executorMem = os.getenv("EXECUTOR_MEM", "16g")
maxPartionBytes = os.getenv("MAX_PARTITION_BYTES", "1g")
pinnedPoolSize = os.getenv("PINNED_POOL_SIZE", "8g")
concurrentGpuTasks = os.getenv("CONCURRENT_GPU_TASKS", "3")
executorCores = int(os.getenv("EXECUTOR_CORES", "16"))
gpuPerTask = 1/executorCores
# Common spark settings
conf = SparkConf()
conf.setMaster(SPARK_MASTER_URL)
conf.setAppName("Delta examples")
conf.set("spark.driver.memory", driverMem)
## The tasks will run on GPU memory, so there is no need to set a high host memory
conf.set("spark.executor.memory", executorMem)
## The tasks will run on GPU cores, so there is no need to use many cpu cores
conf.set("spark.executor.cores", executorCores)
conf.set("spark.locality.wait", "0")
conf.set("spark.sql.files.maxPartitionBytes", maxPartionBytes) 
conf.set("spark.dynamicAllocation.enabled", "false") 
conf.set("spark.sql.adaptive.enabled", "true") 

# Delta settings
conf.set("spark.jars.packages", "io.delta:delta-spark_2.12:3.3.0")
conf.set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
conf.set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

# Plugin settings
conf.set("spark.executor.resource.gpu.amount", "0")
# 4 tasks will run concurrently per GPU
conf.set("spark.rapids.sql.concurrentGpuTasks", concurrentGpuTasks)
# Pinned 8g host memory to transfer data between GPU and host memory
conf.set("spark.rapids.memory.pinnedPool.size", pinnedPoolSize)
# 16 tasks will run concurrently per executor, as we set spark.executor.cores=16
conf.set("spark.task.resource.gpu.amount", "0") 
conf.set("spark.rapids.sql.enabled", "true") 
conf.set("spark.plugins", "com.nvidia.spark.SQLPlugin")
conf.set("spark.rapids.sql.variableFloatAgg.enabled", "true")
conf.set("spark.driver.extraClassPath", RAPIDS_JAR)
conf.set("spark.executor.extraClassPath", RAPIDS_JAR)
conf.set("spark.jars", RAPIDS_JAR)
# Create spark session
spark = SparkSession.builder.config(conf=conf).getOrCreate()
# Load dataframe and create tempView
spark.read.format("delta").load(DATA_ROOT + "/customer").createOrReplaceTempView("customer")
spark.read.format("delta").load(DATA_ROOT + "/store_sales").createOrReplaceTempView("store_sales")
spark.read.format("delta").load(DATA_ROOT + "/catalog_sales").createOrReplaceTempView("catalog_sales")
spark.read.format("delta").load(DATA_ROOT + "/web_sales").createOrReplaceTempView("web_sales")
spark.read.format("delta").load(DATA_ROOT + "/item").createOrReplaceTempView("item")
spark.read.format("delta").load(DATA_ROOT + "/date_dim").createOrReplaceTempView("date_dim")
print("-"*50)

25/09/29 17:10:42 WARN RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.
25/09/29 17:10:42 WARN RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.
25/09/29 17:10:42 WARN RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.
25/09/29 17:10:42 WARN GpuDeviceManager: spark.rapids.memory.host.offHeapLimit.size is not set; we used memory limit derived from (0.8 * (estimated available host memory / device count) - spark.executor.memory - spark.executor.pyspark.memory - spark.memory.offHeap.size) = (0.8 * (106798522368 / 1) - 17179869184 - 0 - 0) = 71694922547
25/09/29 17:10:42 WARN GpuDeviceManager: Off Heap Host Memory configured to be 8192.0 MiB pinned, 59941.60546875 MiB non-pinned, and 240.0 MiB of untracked overhead.
25/09/29 17:10:42 WARN GpuDeviceManager: The default cuDF host pool was alrea

--------------------------------------------------


### Update
This is a microbenchmark about the update running on the GPU. TBD.

In [None]:
spark.sql(f"CREATE TABLE delta.`{DATA_ROOT}/store_sales_update` SHALLOW CLONE delta.`{DATA_ROOT}/store_sales`")
sql = f"""
UPDATE delta.`{DATA_ROOT}/store_sales_update` SET 
ss_wholesale_cost = ss_wholesale_cost * 2,
ss_list_price = ss_list_price * 2,
ss_sales_price = ss_sales_price * 2,
ss_ext_sales_price = ss_ext_sales_price * 2,
ss_ext_wholesale_cost = ss_ext_wholesale_cost * 2,
ss_ext_list_price = ss_ext_list_price * 2,
ss_ext_discount_amt = ss_ext_discount_amt * 2
WHERE ss_store_sk <= 40
"""
print("-"*50)

25/09/29 17:13:13 WARN GpuOverrides: 
      ! <SerializeFromObjectExec> cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec
        ! <MapElementsExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec
          !Expression <AttributeReference> obj#4877 cannot run on GPU because expression AttributeReference obj#4877 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)
          ! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec
            ! <NewInstance> newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstanc

In [32]:
# Run microbenchmark with n retry time
runMicroBenchmark(spark,"update",sql,2)

25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of

+-----------------+
|num_affected_rows|
+-----------------+
|         11338477|
+-----------------+

Retry times : 1, update microbenchmark takes 32.98 seconds


25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:13:52 WARN GpuOverrides: Can't replace any part of

+-----------------+
|num_affected_rows|
+-----------------+
|         11338477|
+-----------------+

Retry times : 2, update microbenchmark takes 29.63 seconds
update microbenchmark takes average 31 seconds after 2 retries


25/09/29 17:14:21 WARN GpuOverrides:                                            
! <LocalTableScanExec> cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec
  @Expression <AttributeReference> toprettystring(num_affected_rows)#13294 could run on GPU



### Delete
This is a microbenchmark about windowing expressions running on GPU mode. The sub-query calculates the average ss_sales_price of a fixed window function partition by ss_customer_sk, and the parent query calculates the average price of the sub-query grouping by each customer. You will see about 25x speedups in this query. The speedup mainly comes from GPUSort/GPUWindow/GPUHashAggregate. The avg aggregation function evaluates all rows which are generated by the sub-query's window function. There will be a more significant performance improvement along with the increasing number of sub-query aggregate functions.

In [33]:
spark.sql(f"CREATE TABLE delta.`{DATA_ROOT}/store_sales_delete` SHALLOW CLONE delta.`{DATA_ROOT}/store_sales`")
sql = f"""
DELETE FROM delta.`{DATA_ROOT}/store_sales_delete` WHERE ss_store_sk <= 40
"""
print("-"*50)

25/09/29 17:37:20 WARN GpuOverrides: 
      ! <SerializeFromObjectExec> cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec
        ! <MapElementsExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec
          !Expression <AttributeReference> obj#13403 cannot run on GPU because expression AttributeReference obj#13403 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)
          ! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec
            ! <NewInstance> newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInsta

--------------------------------------------------


In [None]:
# Run microbenchmark with n retry time
runMicroBenchmark(spark,"delete",sql,1)

25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of

+-----------------+
|num_affected_rows|
+-----------------+
|         11338477|
+-----------------+

Retry times : 1, delete microbenchmark takes 21.76 seconds


25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of

+-----------------+
|num_affected_rows|
+-----------------+
|                0|
+-----------------+

Retry times : 2, delete microbenchmark takes 0.99 seconds
delete microbenchmark takes average 11 seconds after 2 retries


25/09/29 17:37:58 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:37:58 WARN GpuOverrides: 
! <LocalTableScanExec> cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec
  @Expression <AttributeReference> toprettystring(num_affected_rows)#19037 could run on GPU



### Merge
Data skew is caused by many null values in the ss_customer_sk column. You will see about 80x speedups in this query. The heavier skew task a query has, the more improved performance we will get because GPU parallelizes the computation, CPU is limited to just a single core because of how the algorithms are written.

In [None]:
spark.sql(f"CREATE TABLE delta.`{DATA_ROOT}/store_sales_merge` SHALLOW CLONE delta.`{DATA_ROOT}/store_sales`")
sql = f"""
MERGE INTO delta.`{DATA_ROOT}/store_sales_merge` AS target
USING delta.`{DATA_ROOT}/store_sales` AS source
  ON target.ss_ticket_number = source.ss_ticket_number
    AND target.ss_item_sk = source.ss_item_sk
    AND (source.ss_ticket_number * source.ss_item_sk) % 1000 < 400
WHEN MATCHED THEN
  UPDATE SET target.ss_coupon_amt = source.ss_coupon_amt
"""
print("-"*50)

25/09/29 17:39:43 WARN GpuOverrides: 
      ! <SerializeFromObjectExec> cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec
        ! <MapElementsExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec
          !Expression <AttributeReference> obj#19146 cannot run on GPU because expression AttributeReference obj#19146 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)
          ! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec
            ! <NewInstance> newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInsta

--------------------------------------------------


In [36]:
# Run microbenchmark with n retry time
runMicroBenchmark(spark,"merge",sql,1)

25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of

+-----------------+----------------+----------------+-----------------+
|num_affected_rows|num_updated_rows|num_deleted_rows|num_inserted_rows|
+-----------------+----------------+----------------+-----------------+
|         11556035|        11556035|               0|                0|
+-----------------+----------------+----------------+-----------------+

Retry times : 1, merge microbenchmark takes 56.92 seconds
merge microbenchmark takes average 57 seconds after 1 retries


25/09/29 17:40:53 WARN GpuOverrides: 
! <LocalTableScanExec> cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec
  @Expression <AttributeReference> toprettystring(num_affected_rows)#27621 could run on GPU
  @Expression <AttributeReference> toprettystring(num_updated_rows)#27622 could run on GPU
  @Expression <AttributeReference> toprettystring(num_deleted_rows)#27623 could run on GPU
  @Expression <AttributeReference> toprettystring(num_inserted_rows)#27624 could run on GPU



### Bin packing
This is a microbenchmark about intersection operation running on GPU mode. The query calculates items in the same brand, class, and category that are sold in all three sales channels in two consecutive years. You will see about 10x speedups in this query. This is a competition between high cardinality SortMergeJoin vs GpuShuffleHashJoin. The mainly improved performance comes from two SortMergeJoin(s) in this query running on CPU get converted to GpuShuffleHashJoin running on GPU.

In [None]:
# Set maxRecordsPerFile to a small number to create many small files
spark.conf.set("spark.sql.files.maxRecordsPerFile", 10000)
spark.sql(f"CREATE TABLE delta.`{DATA_ROOT}/store_sales_bin_packing` USING delta PARTITIONED BY (ss_sold_date_sk) AS SELECT * FROM delta.`{DATA_ROOT}/store_sales`")
sql = f"""
OPTIMIZE delta.`{DATA_ROOT}/store_sales_bin_packing`
"""
spark.conf.unset("spark.sql.files.maxRecordsPerFile")
print("-"*50)

25/09/29 17:50:38 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:50:38 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric
25/09/29 17:50:38 WARN GpuSortExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric
25/09/29 17:50:38 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric
                                                                                

--------------------------------------------------


In [None]:
# Run microbenchmark with n retry time
runMicroBenchmark(spark,"NDS Q14a subquery",query,1)

+----------+
|ss_item_sk|
+----------+
|      4323|
|      4324|
|      4325|
|      4327|
|      4328|
+----------+
only showing top 5 rows

Retry times : 1, NDS Q14a subquery microbenchmark takes 6.71 seconds
+----------+
|ss_item_sk|
+----------+
|     14103|
|     14104|
|     14105|
|     14107|
|     14108|
+----------+
only showing top 5 rows

Retry times : 2, NDS Q14a subquery microbenchmark takes 6.11 seconds
NDS Q14a subquery microbenchmark takes average 6 seconds after 2 retries


### Liquid clustering
This is a microbenchmark for a 1-million rows crossjoin with itself. You will see about 10x speedups in this query. The mainly improved performance comes from converting BroadcastNestedLoogJoin running on CPU to GpuBroadcastNestedLoogJoin running on GPU.

In [42]:
spark.sql(f"CREATE TABLE delta.`{DATA_ROOT}/store_sales_clustered` USING delta CLUSTER BY (ss_sold_date_sk) AS SELECT * FROM delta.`{DATA_ROOT}/store_sales`")
sql = f"""
OPTIMIZE delta.`{DATA_ROOT}/store_sales_clustered`
"""
print("-"*50)

25/09/29 17:58:13 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:58:14 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric
25/09/29 17:58:14 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric
25/09/29 17:58:16 WARN GpuOverrides:                                            
! <SerializeFromObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec
  @Expression <Alias> staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).path, true, false, true) AS path#33032 could run on GPU
    !Expression <StaticInvoke> staticinvoke(class org.apache.spark.unsafe.types.UTF8String, St

--------------------------------------------------


In [44]:
# Run microbenchmark with n retry time
runMicroBenchmark(spark,"clustering",sql,1)

25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU
25/09/29 17:58:36 WARN GpuOverrides: Can't replace any part of

+--------------------+--------------------+
|                path|             metrics|
+--------------------+--------------------+
|file:/home/jihoon...|{1, 16, {12463571...|
+--------------------+--------------------+

Retry times : 1, clustering microbenchmark takes 20.67 seconds
clustering microbenchmark takes average 21 seconds after 1 retries


25/09/29 17:58:56 WARN GpuOverrides: 
! <LocalTableScanExec> cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec
  @Expression <AttributeReference> toprettystring(path)#35138 could run on GPU
  @Expression <AttributeReference> toprettystring(metrics)#35139 could run on GPU



In [45]:
spark.stop()