In [1]:
from pyspark.sql import SparkSession

warehouse_path = "file:///C:/tmp/spark_warehouse"
postgres_driver_path = "C:\postgresql-42.7.5.jar"

# Initialize SparkSession
spark = SparkSession.builder \
    .master("local[*]") \
    .appName("Hudi Batch Write") \
    .config("spark.jars.packages", "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.14.0") \
    .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
    .config("spark.sql.hive.convertMetastoreParquet", "false") \
    .config("spark.sql.extensions", "org.apache.spark.sql.hudi.HoodieSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.hudi.catalog.HoodieCatalog") \
    .config("spark.driver.memory", "4g") \
    .config("spark.executor.memory", "4g") \
    .config("spark.executor.memoryOverhead", "1g") \
    .config("spark.driver.memoryOverhead", "1g") \
    .config("spark.sql.warehouse.dir", warehouse_path) \
    .getOrCreate()

# Now you can read the Hudi table
read_df = spark.read \
    .format("hudi") \
    .load(f"{warehouse_path}/from_kafka")  # double check the path

# Show the contents
read_df.show()

# Optional: You can also perform queries on the data
# For example, to get total records:
print(f"Total records: {read_df.count()}")

# Or to see the schema:
read_df.printSchema()

+-------------------+--------------------+------------------+----------------------+--------------------+------+--------+----------+--------------------+-----+-------------+------------------+------+----------------+---------+--------------------+-----+---------+----------+---------------+-----------+------------+------+-------------+--------------------+----------+----------------+----------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|   _hoodie_file_name|    id|semester|      code|         description|units|instructor_id|   instructor_name|srcode|        fullname|   campus|             program|major|yearlevel|curriculum|  class_section|grade_final|grade_reexam|status|grade_numeric|grade_classification|start_year|        year_sem|program_id|     processing_time|schoolyear|
+-------------------+--------------------+------------------+----------------------+--------------------+------+--------+----------+----------