In [None]:
import os
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .master("local[*]") \
    .appName("hudi_read") \
    .config('spark.serializer', 'org.apache.spark.serializer.KryoSerializer') \
    .config('spark.sql.catalog.spark_catalog', 'org.apache.spark.sql.hudi.catalog.HoodieCatalog') \
    .config('spark.sql.extensions', 'org.apache.spark.sql.hudi.HoodieSparkSessionExtension') \
    .enableHiveSupport()

# https://github.com/apache/hudi/pull/8082
spark.config("spark.sql.legacy.parquet.nanosAsLong", "false") \
    .config("spark.sql.parquet.binaryAsString", "false") \
    .config("spark.sql.parquet.int96AsTimestamp", "true") \
    .config("spark.sql.caseSensitive", "false")

spark = spark.getOrCreate()

In [None]:
abspath = os.path.abspath('')
tablePath = f"{abspath}/hudi-warehouse/hudi_trips_cow"
spark.sql(f"create table if not exists hudi_trips_cow using hudi location '{tablePath}';")
spark.sql("show tables").show()
# spark.sql("show partitions hudi_trips_cow").show(truncate=False)
# spark.sql("show current schema").show()
# spark.sql("show create table hudi_trips_cow").show(truncate=False)

In [None]:
spark.sql("select * from hudi_trips_cow").toPandas()

In [None]:
# test that primary key works
spark.sql("insert into hudi_trips_cow select * from hudi_trips_cow where partitionpath='asia/india/chennai' and uuid = 'bbfc3870-122a-45d2-a83a-18ceb4d5f8be';").toPandas()
spark.sql("select * from hudi_trips_cow where partitionpath='asia/india/chennai' and uuid = 'bbfc3870-122a-45d2-a83a-18ceb4d5f8be';").toPandas()

In [None]:
# test that global index works
spark.sql("insert into hudi_trips_cow select 0.1111 as begin_lat, 0.564409 as begin_lon, 'driver-213' as driver, 0.798706 as end_lat, 0.026984 as end_lon, 17.851135 as fare, 'rider-213' as rider, 1681877020987 as ts, 'bbfc3870-122a-45d2-a83a-18ceb4d5f8be' as uuid, 'asia/india/chennai_new' as partitionpath;").show()
spark.sql("select * from hudi_trips_cow where uuid = 'bbfc3870-122a-45d2-a83a-18ceb4d5f8be';").toPandas()