In [0]:
from pyspark.sql import functions as F
from pyspark.sql.functions import when, col, regexp_replace

# Paths
bronze_path = "s3://customer-seg-project/bronze/"
silver_path = "s3://customer-seg-project/silver_delta/"
silver_csv_path = "s3://customer-seg-project/exports/silver_csv/"

# Load Bronze Parquet
df_bronze = spark.read.format("parquet").load(bronze_path)

# Transform Data
def bool_to_int(column):
    return when(col(column) == "True", 1).when(col(column) == "False", 0).otherwise(None)

df_silver = (
    df_bronze
    .withColumn("amount", regexp_replace(col("amount"), ",", "").cast("double"))
    .withColumn("is_international", bool_to_int("is_international"))
    .withColumn("is_chip", bool_to_int("is_chip"))
    .withColumn("is_contactless", bool_to_int("is_contactless"))
    .withColumn("label_fraud", bool_to_int("label_fraud"))
)

# Save Silver Delta
df_silver.write.format("delta").mode("overwrite").save(silver_path)

# Export CSV
df_silver.write.option("header","true").mode("overwrite").csv(silver_csv_path)

display(df_silver.limit(10))


event_id,event_time,city,channel,merchant_cat,card_id,amount,is_international,is_chip,is_contactless,label_fraud,customer_age_group,customer_type,key
c344b966-863c-4fa9-b8b4-f9a09670f912,2025-02-17T22:22:32.038Z,Hyderabad,POS,Pharmacy,CARD-20242172,37.0591067428691,0,1,0,0,26-35,Regular,Hyderabad|Pharmacy|POS|CARD-20242172
a6b54e2e-45bb-4582-ab08-bac2836b7553,2025-01-22T03:06:19.126Z,Mumbai,ECom,Grocery,CARD-48810182,1.4939954025967166,0,0,1,0,26-35,Premium,Mumbai|Grocery|ECom|CARD-48810182
b1563ece-9c17-4011-836c-cc45d3d44a11,2025-02-14T14:41:56.097Z,Bengaluru,POS,Grocery,CARD-00430808,38.87808966108312,0,0,0,0,26-35,Regular,Bengaluru|Grocery|POS|CARD-00430808
bfb557b9-68b9-449b-810a-83b92bb8509b,2025-01-06T10:11:36.013Z,Chennai,ECom,BillPay,CARD-89785952,1.510066339377056,0,1,0,0,26-35,Occasional,Chennai|BillPay|ECom|CARD-89785952
e461c6c2-a3dc-4ac8-80f6-22f5d064eec1,2025-01-10T05:20:03.917Z,Bengaluru,POS,Dining,CARD-79336287,4.38135980294852,0,1,1,0,26-35,Regular,Bengaluru|Dining|POS|CARD-79336287
0016f7ed-7735-41c4-968f-3a7ba5db9710,2025-02-01T04:53:37.188Z,Delhi,ATM,Pharmacy,CARD-36952056,42.79731810752476,0,1,1,0,36-50,Occasional,Delhi|Pharmacy|ATM|CARD-36952056
5f48a9b2-da12-4ec7-908c-c17b0d4f2f31,2025-01-17T14:43:26.690Z,Chennai,ECom,Entertainment,CARD-86285043,5.346788055811756,0,1,0,0,18-25,Regular,Chennai|Entertainment|ECom|CARD-86285043
28b268a0-5c53-43bf-8e48-6dd2472dc070,2025-01-20T13:42:47.472Z,Hyderabad,Mobile,Entertainment,CARD-59487284,42.22887903168006,0,0,1,0,18-25,Occasional,Hyderabad|Entertainment|Mobile|CARD-59487284
99c18104-99a2-49d8-ad2b-7735e6ff6e7b,2025-02-24T08:24:59.414Z,Mumbai,POS,Electronics,CARD-59426022,29.60361843508756,1,1,0,0,26-35,Premium,Mumbai|Electronics|POS|CARD-59426022
44b81a87-b8a3-44fe-b2ed-73eb498a93f3,2025-02-14T22:53:37.620Z,Chennai,ECom,Grocery,CARD-10782201,2.0139124825944235,0,1,1,0,18-25,Occasional,Chennai|Grocery|ECom|CARD-10782201
