In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType
import datetime

spark = SparkSession.builder.appName("CustomerData").getOrCreate()


In [0]:
# Stub for creating the schema in Unity Catalog (assume catalog 'retail' exists)
spark.sql("CREATE SCHEMA IF NOT EXISTS retail.customer_data")


DataFrame[]

In [0]:

customer_schema = StructType([
    StructField("customer_id", IntegerType(), False),
    StructField("name", StringType(), True),
    StructField("region", StringType(), True),
    StructField("status", StringType(), True),
    StructField("signup_date", DateType(), True)
])

# ✅ Sample data using datetime.date objects
customer_data = [
    (1, "Alice Johnson", "West", "Active", datetime.date(2023, 1, 15)),
    (2, "Bob Smith", "East", "Inactive", datetime.date(2023, 2, 20)),
    (3, "Charlie Lee", "West", "Active", datetime.date(2023, 3, 10))
]


In [0]:

# ✅ Create DataFrame
customers_df = spark.createDataFrame(customer_data, schema=customer_schema)

# ✅ Show DataFrame (works everywhere)
print("Initial Customer Data:")
customers_df.show(truncate=False)

# ✅ Write as Delta table
customers_df.write.format("delta").mode("overwrite").saveAsTable("retail.customer_data.customers")

# ✅ Query active customers in 'West'
print("Active Customers in West Region:")
result_df = spark.sql("""
    SELECT customer_id, name, region
    FROM retail.customer_data.customers
    WHERE status = 'Active' AND region = 'West'
""")
result_df.show(truncate=False)


Initial Customer Data:
+-----------+-------------+------+--------+-----------+
|customer_id|name         |region|status  |signup_date|
+-----------+-------------+------+--------+-----------+
|1          |Alice Johnson|West  |Active  |2023-01-15 |
|2          |Bob Smith    |East  |Inactive|2023-02-20 |
|3          |Charlie Lee  |West  |Active  |2023-03-10 |
+-----------+-------------+------+--------+-----------+

Active Customers in West Region:
+-----------+-------------+------+
|customer_id|name         |region|
+-----------+-------------+------+
|1          |Alice Johnson|West  |
|3          |Charlie Lee  |West  |
+-----------+-------------+------+

