In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("PySpark-Hive-Integration") \
    .enableHiveSupport() \
    .getOrCreate()

# Create a sample DataFrame
data = [("Alice", 25, "New York"),
        ("Bob", 30, "London"),
        ("Charlie", 35, "Paris")]
columns = ["name", "age", "city"]
df = spark.createDataFrame(data, columns)

# Print the DataFrame schema and data
print("DataFrame schema:")
df.printSchema()
print("DataFrame content:")
df.show()

# Write the DataFrame to a Hive table
# The `mode("overwrite")` ensures that the table is recreated if it already exists.
# The `saveAsTable()` function creates a managed table in Hive.
hive_table_name = "people_table"
df.write.mode("overwrite").saveAsTable(hive_table_name)

print(f"Data has been successfully written to Hive table '{hive_table_name}'.")
spark.stop()

DataFrame schema:
root
 |-- name: string (nullable = true)
 |-- age: long (nullable = true)
 |-- city: string (nullable = true)

DataFrame content:
+-------+---+--------+
|   name|age|    city|
+-------+---+--------+
|  Alice| 25|New York|
|    Bob| 30|  London|
|Charlie| 35|   Paris|
+-------+---+--------+

Data has been successfully written to Hive table 'people_table'.
