## Reading table from silver to gold layer

In [0]:
fraud_flag_df = spark.read.table("silver.fraud_flag")
customer_segments_df = spark.read.table("silver.customer_segments")

##Customer Segments Table in Delta Lake 

In [0]:
from pyspark.sql.functions import count, first
from pyspark.sql import functions as F

customer_segments_result_df = customer_segments_df.groupBy("customer_id") \
    .agg(
        count("*").alias("segment_count"),
        first("segment_name").alias("segment_name"),
        first("segment_description").alias("segment_description"),
        first("last_updated_date").alias("last_updated_date")
    )

# Drop 'segment_count' column
customer_segments_result_df = customer_segments_result_df.drop("segment_count").withColumn('segment_id', F.concat(F.lit("S00"), F.expr("monotonically_increasing_id()")))


# Reorder the columns
ordered_columns = [ 'segment_id','customer_id', 'segment_name', 'segment_description', 'last_updated_date']
df_ordered = customer_segments_result_df.select(ordered_columns)

# Write the DataFrame to a Delta table
df_ordered.write.format("delta").mode("overwrite").saveAsTable("gold.customer_segments")


In [0]:
customer_segments_result_df.show()

+-----------+------------+--------------------+--------------------+----------+
|customer_id|segment_name| segment_description|   last_updated_date|segment_id|
+-----------+------------+--------------------+--------------------+----------+
|      C1000|       Loyal|Consistent activi...|2024-07-31 14:01:...|      S000|
|      C1001|       Loyal|Consistent activi...|2024-07-31 14:01:...|      S001|
|      C1002|       Loyal|Consistent activi...|2024-07-31 14:01:...|      S002|
|      C1003|       Loyal|Consistent activi...|2024-07-31 14:01:...|      S003|
|      C1004|       Loyal|Consistent activi...|2024-07-31 14:01:...|      S004|
|      C1005|       Loyal|Consistent activi...|2024-07-31 14:01:...|      S005|
|      C1006|       Loyal|Consistent activi...|2024-07-31 14:01:...|      S006|
|      C1007|       Loyal|Consistent activi...|2024-07-31 14:01:...|      S007|
|      C1008|       Loyal|Consistent activi...|2024-07-31 14:01:...|      S008|
|      C1010|       Loyal|Consistent act

In [0]:
df_ordered.display()

segment_id,customer_id,segment_name,segment_description,last_updated_date
S000,C1000,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S001,C1001,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S002,C1002,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S003,C1003,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S004,C1004,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S005,C1005,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S006,C1006,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S007,C1007,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S008,C1008,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S009,C1010,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000


In [0]:
df_ordered.display()


segment_id,customer_id,segment_name,segment_description,last_updated_date
S000,C1000,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S001,C1001,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S002,C1002,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S003,C1003,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S004,C1004,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S005,C1005,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S006,C1006,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S007,C1007,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S008,C1008,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000
S009,C1010,Loyal,Consistent activity for over 5 years,2024-07-31T14:01:48.024+0000


In [0]:
fraud_flag_df.write.format("delta").mode("overwrite").saveAsTable("gold.fraud_flag")


In [0]:
fraud_flag_df.display()

flag_id,transaction_id,flag_type,timestamp,confidence_score
F0008589934592,T7007,pattern_anomaly,2024-07-31T14:01:27.802+0000,0.7
F0008589934593,T7047,pattern_anomaly,2024-07-31T14:01:27.802+0000,0.7
F0008589934594,T7085,pattern_anomaly,2024-07-31T14:01:27.802+0000,0.7
F0008589934595,T7111,pattern_anomaly,2024-07-31T14:01:27.802+0000,0.7
F0008589934596,T7115,unusual_amount,2024-07-31T14:01:27.802+0000,0.75
F0008589934597,T7143,new_geolocation,2024-07-31T14:01:27.802+0000,0.8
F0008589934598,T7173,pattern_anomaly,2024-07-31T14:01:27.802+0000,0.7
F0008589934599,T7182,pattern_anomaly,2024-07-31T14:01:27.802+0000,0.7
F0008589934600,T7260,pattern_anomaly,2024-07-31T14:01:27.802+0000,0.7
F0008589934601,T7277,new_geolocation,2024-07-31T14:01:27.802+0000,0.8


In [0]:
merg_df = spark.read.table("silver.merge_table")

In [0]:
merg_df.write.format("delta").mode("overwrite").saveAsTable("gold.merge_tbl")

In [0]:
merg_df.display()

transaction_id,channel,transaction_type,amount,currency,transaction_timestamp,transaction_status,customer_id,customer_name,email,phone,address,credit_score,join_date,last_update,branch_id,branch_name,branch_location,branch_timezone
T8000,mobile,deposit,85.67,EUR,2025-01-31T12:54:00.000+0000,pending,C1132,Rebecca Manning,rebecca.manning@aol.com,(653) 635-5660,"Unit 2963 Box 1023, DPO AE 75681",761,2019-10-05,2024-07-28 00:00:00,B0008,Central Branch,West Sandrachester,AEST
T8001,mobile,payment,22.78,USD,2025-01-31T15:04:00.000+0000,pending,C1979,Nicole Owens,nicole.owens@outlook.com,(653) 347-8022,"12603 Bird Keys Suite 438, Lake Brittany, NM 18496",830,2024-06-16,2024-07-26 00:00:00,B0005,East Branch,Hillside,PST
T8002,mobile,transfer,6.65,GBP,2025-01-31T18:03:00.000+0000,completed,C1997,Timothy Hill,timothy.hill@gmail.com,(285) 968-6159,"71758 Rachel Mill, Port Kevin, ID 18244",754,2024-07-16,2024-07-21 00:00:00,B0007,West Branch,East Melindahaven,AEST
T8003,mobile,withdrawal,3.35,EUR,2025-02-01T03:11:00.000+0000,completed,C1295,Kelsey Parker,kelsey.parker@outlook.com,(001) 783-9124,"9984 Bates Center, Amberhaven, TX 29506",555,2020-07-17,2024-07-21 00:00:00,B0008,Central Branch,West Sandrachester,AEST
T8004,branch,withdrawal,83.96,GBP,2025-02-01T08:22:00.000+0000,completed,C1791,Michael Levine,michael.levine@aol.com,(125) 345-3823,"85128 Gina Forest, Michaelton, AR 63167",825,2023-05-07,2024-07-29 00:00:00,B0003,Central Branch,East Crystalberg,GMT
T8005,branch,payment,108.42,EUR,2025-02-01T10:44:00.000+0000,completed,C1957,Jay Sanchez,jay.sanchez@aol.com,(724) 700-5663,"2475 Thompson Viaduct Apt. 129, Burtonfurt, ME 83398",711,2024-04-09,2024-07-29 00:00:00,B0002,North Branch,Johnsonbury,PST
T8006,mobile,payment,17202.33,GBP,2025-02-01T21:34:00.000+0000,completed,C1234,Dr. Kristina Foley,dr..foley@yahoo.com,(316) 636-1829,"9620 Carlson Villages, Lisaview, VA 04046",650,2020-03-18,2024-07-24 00:00:00,B0008,Central Branch,West Sandrachester,AEST
T8007,mobile,payment,64.11,EUR,2025-02-02T06:43:00.000+0000,completed,C1400,Amanda Mercado,amanda.mercado@hotmail.com,(591) 997-7914,"160 John Alley Apt. 761, East Aliciafort, WI 27481",770,2021-01-29,2024-07-28 00:00:00,B0008,Central Branch,West Sandrachester,AEST
T8008,web,withdrawal,6.98,GBP,2025-02-02T18:42:00.000+0000,pending,C1594,Linda Mclaughlin,linda.mclaughlin@hotmail.com,(247) 305-5434,"711 Daniel Pines Suite 471, Williamtown, FM 58477",780,2022-03-03,2024-07-31 00:00:00,B0000,West Branch,Hillside,AEST
T8009,ATM,deposit,11.29,EUR,2025-02-02T22:00:00.000+0000,completed,C1714,Kayla York,kayla.york@gmail.com,(846) 916-6355,"1121 Victor Rapid Apt. 978, East Allison, HI 82055",587,2022-11-24,2024-07-20 00:00:00,B0009,West Branch,Jessicaborough,PST
