## Reading table from Silver to Gold layer

In [0]:
fraud_flag_table_df = spark.read.table("silver.fraud_flag")
customer_segments_table_df = spark.read.table("silver.customer_segments")

##Customer Segments Table in Delta Lake 
##### Groupby customer id then reorder column and write/saved  to table in gold layer

In [0]:
from pyspark.sql.functions import count, first
from pyspark.sql import functions as F

customer_segments_result_df = customer_segments_table_df.groupBy("customer_id") \
    .agg(
        count("*").alias("segment_count"),
        first("segment_name").alias("segment_name"),
        first("segment_description").alias("segment_description"),
        first("last_updated_date").alias("last_updated_date")
    )

# Drop 'segment_count' column
customer_segments_result_df = customer_segments_result_df.drop("segment_count").withColumn('segment_id', F.concat(F.lit("S00"), F.expr("monotonically_increasing_id()")))


# Reorder the columns
ordered_columns = [ 'segment_id','customer_id', 'segment_name', 'segment_description', 'last_updated_date']
df_ordered = customer_segments_result_df.select(ordered_columns)

# Write the DataFrame to a Delta table
df_ordered.write.format("delta").mode("overwrite").saveAsTable("gold.customer_segments")


In [0]:
customer_segments_result_df.show()

+-----------+------------+--------------------+--------------------+----------+
|customer_id|segment_name| segment_description|   last_updated_date|segment_id|
+-----------+------------+--------------------+--------------------+----------+
|      C1001|       Loyal|Consistent activi...|2024-08-02 09:35:...|      S000|
|      C1002|       Loyal|Consistent activi...|2024-08-02 09:35:...|      S001|
|      C1003|       Loyal|Consistent activi...|2024-08-02 09:35:...|      S002|
|      C1004|       Loyal|Consistent activi...|2024-08-02 09:35:...|      S003|
|      C1005|       Loyal|Consistent activi...|2024-08-02 09:35:...|      S004|
|      C1006|       Loyal|Consistent activi...|2024-08-02 09:35:...|      S005|
|      C1007|       Loyal|Consistent activi...|2024-08-02 09:35:...|      S006|
|      C1008|       Loyal|Consistent activi...|2024-08-02 09:35:...|      S007|
|      C1009|       Loyal|Consistent activi...|2024-08-02 09:35:...|      S008|
|      C1010|       Loyal|Consistent act

In [0]:
df_ordered.display()

segment_id,customer_id,segment_name,segment_description,last_updated_date
S000,C1001,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S001,C1002,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S002,C1003,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S003,C1004,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S004,C1005,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S005,C1006,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S006,C1007,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S007,C1008,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S008,C1009,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S009,C1010,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000


In [0]:
df_ordered.display()


segment_id,customer_id,segment_name,segment_description,last_updated_date
S000,C1001,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S001,C1002,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S002,C1003,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S003,C1004,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S004,C1005,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S005,C1006,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S006,C1007,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S007,C1008,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S008,C1009,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000
S009,C1010,Loyal,Consistent activity for over 5 years,2024-08-02T09:35:30.301+0000


In [0]:
fraud_flag_table_df.write.format("delta").mode("overwrite").saveAsTable("gold.fraud_flag")


In [0]:
fraud_flag_table_df.display()

flag_id,transaction_id,flag_type,timestamp,confidence_score
F00017179869184,T6068,pattern_anomaly,2024-08-02T09:35:08.513+0000,0.7
F00017179869185,T6139,unusual_amount,2024-08-02T09:35:08.513+0000,0.75
F00017179869186,T6143,pattern_anomaly,2024-08-02T09:35:08.513+0000,0.7
F00017179869187,T6147,new_geolocation,2024-08-02T09:35:08.513+0000,0.8
F00017179869188,T6176,new_geolocation,2024-08-02T09:35:08.513+0000,0.8
F00017179869189,T6180,new_geolocation,2024-08-02T09:35:08.513+0000,0.8
F00017179869190,T6185,unusual_amount,2024-08-02T09:35:08.513+0000,0.75
F00017179869191,T6226,pattern_anomaly,2024-08-02T09:35:08.513+0000,0.7
F00017179869192,T6292,pattern_anomaly,2024-08-02T09:35:08.513+0000,0.7
F00017179869193,T6314,pattern_anomaly,2024-08-02T09:35:08.513+0000,0.7


In [0]:
combined_df = spark.read.table("silver.merge_table")

In [0]:
combined_df.write.format("delta").mode("overwrite").saveAsTable("gold.Merge_tbl")

In [0]:
combined_df.display()

transaction_id,channel,transaction_type,amount,currency,transaction_timestamp,transaction_status,customer_id,customer_name,email,phone,address,credit_score,join_date,last_update,branch_id,branch_name,branch_location,branch_timezone
T8000,branch,deposit,3.88,USD,2025-01-15T19:41:00.000+0000,completed,C1475,Chase Ellis,chase.ellis@aol.com,(573) 602-0647,"180 Bradley Harbor Suite 169, East Julie, VA 98352",809,2021-08-29,2024-07-26 00:00:00,B0007,North Branch,Richardsmouth,PST
T8001,web,payment,21.34,USD,2025-01-16T03:02:00.000+0000,completed,C1564,Melanie Solis,melanie.solis@outlook.com,(149) 795-0558,"23455 Tara Club Apt. 739, South Lucasville, NC 75728",554,2022-02-09,2024-07-30 00:00:00,B0005,West Branch,New Shawnashire,AEST
T8002,web,transfer,43.89,GBP,2025-01-16T04:39:00.000+0000,completed,C1721,Bradley Robbins,bradley.robbins@gmail.com,(178) 730-0285,"363 Brown Fords Suite 069, South Jamesfort, AS 98431",542,2022-12-28,2024-07-28 00:00:00,B0004,West Branch,North Elizabeth,EST
T8003,mobile,withdrawal,1.0,USD,2025-01-16T16:16:00.000+0000,completed,C1826,Blake Dixon,blake.dixon@aol.com,(126) 466-0935,"860 Peter Lane Apt. 355, Bellport, AK 33748",675,2023-08-13,2024-07-23 00:00:00,B0006,North Branch,Lake Lindsey,GMT
T8004,branch,transfer,314.98,USD,2025-01-16T21:34:00.000+0000,completed,C1604,Henry Brown,henry.brown@gmail.com,(169) 469-4356,"50187 Tonya Circles Suite 246, New Stevenborough, OK 18247",768,2022-04-28,2024-07-28 00:00:00,B0006,North Branch,Lake Lindsey,GMT
T8005,web,payment,136.33,GBP,2025-01-17T00:41:00.000+0000,completed,C1221,Mr. Andrew Novak DDS,mr..dds@gmail.com,(235) 982-3440,"49916 Smith Circle Apt. 064, Lake Douglasville, NJ 37721",780,2020-04-08,2024-07-29 00:00:00,B0008,West Branch,New Robert,AEST
T8006,ATM,deposit,23.27,EUR,2025-01-17T06:43:00.000+0000,completed,C1237,Christopher Smith,christopher.smith@outlook.com,(901) 628-2484,"07006 Beck Creek Apt. 422, Robertland, PW 10007",668,2020-05-08,2024-07-28 00:00:00,B0001,North Branch,Port Luistown,AEST
T8007,ATM,transfer,21.7,GBP,2025-01-17T10:42:00.000+0000,completed,C1705,Crystal Wilson,crystal.wilson@aol.com,(757) 973-6298,"2720 Justin Mission Apt. 025, Michaelmouth, DE 70671",706,2022-11-11,2024-07-25 00:00:00,B0000,Downtown Branch,Jacksonborough,AEST
T8008,web,withdrawal,9.12,EUR,2025-01-17T14:20:00.000+0000,completed,C1055,Elizabeth Franklin,elizabeth.franklin@gmail.com,(153) 052-2793,"899 Alvarez Radial Apt. 171, Hillfort, IN 39249",837,2019-04-29,2024-07-21 00:00:00,B0008,West Branch,New Robert,AEST
T8009,branch,withdrawal,123.93,GBP,2025-01-17T22:08:00.000+0000,completed,C1962,Cathy Larson,cathy.larson@yahoo.com,(997) 568-9516,"USNV Gonzalez, FPO AP 47526",553,2024-05-19,2024-07-20 00:00:00,B0001,North Branch,Port Luistown,AEST
