In [0]:
from pyspark.sql.functions import to_timestamp

# ---- Paths ----
raw_path = "s3://customer-seg-project/data.csv"
bronze_path = "s3://customer-seg-project/bronze/"
bronze_csv_path = "s3://customer-seg-project/exports/bronze_csv/"

# ---- Load raw CSV with inferred schema ----
df_bronze = (
    spark.read.format("csv")
    .option("header", "true")
    .option("inferSchema", "true")
    .load(raw_path)
)

# ---- Convert event_time to timestamp ----
df_bronze = df_bronze.withColumn("event_time", to_timestamp("event_time"))

# ---- Save Bronze as Parquet ----
df_bronze.write.format("parquet").mode("overwrite").save(bronze_path)

# ---- Export CSV for validation/dashboard ----
df_bronze.write.option("header", "true").mode("overwrite").csv(bronze_csv_path)

display(df_bronze.limit(10))


event_id,event_time,city,channel,merchant_cat,card_id,amount,is_international,is_chip,is_contactless,label_fraud,customer_age_group,customer_type,key
51a4b773-4166-4241-a41a-1b154fa8cb27,2025-02-20T21:28:17.003Z,Bengaluru,POS,Travel,CARD-93081371,20.10772069625108,False,True,False,False,51+,Regular,Bengaluru|Travel|POS|CARD-93081371
79f5ffb6-dfa6-4303-9bc6-adfbf7fd71ae,2025-01-12T10:43:07.277Z,Hyderabad,Mobile,Fashion,CARD-93243358,13.305304758958258,False,True,False,False,36-50,Regular,Hyderabad|Fashion|Mobile|CARD-93243358
a37e9fe2-61fe-411a-8e58-1bcdb7bb292f,2025-02-14T12:22:18.012Z,Pune,POS,Dining,CARD-81433521,17.46651590665398,False,True,False,False,18-25,Premium,Pune|Dining|POS|CARD-81433521
6b476b16-1fcd-4da3-baea-d3096bf540b0,2025-01-29T15:14:03.452Z,Pune,ATM,Pharmacy,CARD-65919382,4.260341587544124,True,False,True,False,18-25,Occasional,Pune|Pharmacy|ATM|CARD-65919382
574a0089-597f-4d56-87af-0d84164f867d,2025-01-15T12:48:59.240Z,Bengaluru,ECom,Entertainment,CARD-31104788,5.005944230369062,False,True,True,False,26-35,Occasional,Bengaluru|Entertainment|ECom|CARD-31104788
5a4583c3-bfc7-4bde-9107-f1229835e4fc,2025-01-09T22:06:20.733Z,Bengaluru,ECom,Travel,CARD-12752623,7.46477679640081,False,True,False,False,36-50,Regular,Bengaluru|Travel|ECom|CARD-12752623
ea918b72-a379-4be2-8cd1-e76323857a3b,2025-01-05T16:07:51.495Z,Kolkata,ECom,Pharmacy,CARD-33323037,40.4863280428366,False,False,True,False,26-35,Regular,Kolkata|Pharmacy|ECom|CARD-33323037
7d4ee06b-6e44-406f-b2cf-d8c649069ddc,2025-02-10T19:12:45.562Z,Chennai,ATM,Dining,CARD-41090558,6.279995434072502,False,True,True,False,26-35,Premium,Chennai|Dining|ATM|CARD-41090558
944f0cce-33ea-4514-934c-90aa907fec2d,2025-01-21T06:08:51.486Z,Bengaluru,ECom,Grocery,CARD-21758952,1.706596027879325,True,True,False,False,26-35,Occasional,Bengaluru|Grocery|ECom|CARD-21758952
145bca1a-723a-4e1c-9626-c489758ecb95,2025-01-24T04:57:00.279Z,Bengaluru,ECom,BillPay,CARD-74964980,0.1464432478305408,False,False,True,False,26-35,Regular,Bengaluru|BillPay|ECom|CARD-74964980
