## Reading table from silver to gold layer

In [0]:
fraud_flag_df = spark.read.table("silver.fraud_flag")
customer_segments_df = spark.read.table("silver.customer_segments")

##Customer Segments Table in Delta Lake 

In [0]:
from pyspark.sql.functions import count, first
from pyspark.sql import functions as F

customer_segments_result_df = customer_segments_df.groupBy("customer_id") \
    .agg(
        count("*").alias("segment_count"),
        first("segment_name").alias("segment_name"),
        first("segment_description").alias("segment_description"),
        first("last_updated_date").alias("last_updated_date")
    )

# Drop 'segment_count' column
customer_segments_result_df = customer_segments_result_df.drop("segment_count").withColumn('segment_id', F.concat(F.lit("S00"), F.expr("monotonically_increasing_id()")))


# Reorder the columns
ordered_columns = [ 'segment_id','customer_id', 'segment_name', 'segment_description', 'last_updated_date']
df_ordered = customer_segments_result_df.select(ordered_columns)

# Write the DataFrame to a Delta table
df_ordered.write.format("delta").mode("overwrite").saveAsTable("gold.customer_segments")


In [0]:
customer_segments_result_df.show()

+-----------+------------+--------------------+--------------------+----------+
|customer_id|segment_name| segment_description|   last_updated_date|segment_id|
+-----------+------------+--------------------+--------------------+----------+
|      C1000|       Loyal|Consistent activi...|2024-07-31 13:17:...|      S000|
|      C1001|       Loyal|Consistent activi...|2024-07-31 13:17:...|      S001|
|      C1002| Credit_Risk|Customers with lo...|2024-07-31 13:17:...|      S002|
|      C1003|       Loyal|Consistent activi...|2024-07-31 13:17:...|      S003|
|      C1004| Credit_Risk|Customers with lo...|2024-07-31 13:17:...|      S004|
|      C1005|       Loyal|Consistent activi...|2024-07-31 13:17:...|      S005|
|      C1006|       Loyal|Consistent activi...|2024-07-31 13:17:...|      S006|
|      C1007|       Loyal|Consistent activi...|2024-07-31 13:17:...|      S007|
|      C1008|       Loyal|Consistent activi...|2024-07-31 13:17:...|      S008|
|      C1009|       Loyal|Consistent act

In [0]:
df_ordered.display()

segment_id,customer_id,segment_name,segment_description,last_updated_date
S000,C1000,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S001,C1001,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S002,C1002,Credit_Risk,Customers with low credit scores,2024-07-31T13:17:26.514+0000
S003,C1003,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S004,C1004,Credit_Risk,Customers with low credit scores,2024-07-31T13:17:26.514+0000
S005,C1005,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S006,C1006,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S007,C1007,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S008,C1008,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S009,C1009,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000


In [0]:
df_ordered.display()


segment_id,customer_id,segment_name,segment_description,last_updated_date
S000,C1000,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S001,C1001,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S002,C1002,Credit_Risk,Customers with low credit scores,2024-07-31T13:17:26.514+0000
S003,C1003,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S004,C1004,Credit_Risk,Customers with low credit scores,2024-07-31T13:17:26.514+0000
S005,C1005,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S006,C1006,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S007,C1007,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S008,C1008,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000
S009,C1009,Loyal,Consistent activity for over 5 years,2024-07-31T13:17:26.514+0000


In [0]:
fraud_flag_df.write.format("delta").mode("overwrite").saveAsTable("gold.fraud_flag")


In [0]:
fraud_flag_df.display()

flag_id,transaction_id,flag_type,timestamp,confidence_score
F0000,T8322,pattern_anomaly,2024-07-31T13:17:04.711+0000,0.7
F0001,T8322,pattern_anomaly,2024-07-31T13:17:04.711+0000,0.7
F0002,T5232,new_geolocation,2024-07-31T13:17:04.711+0000,0.8
F0003,T5232,new_geolocation,2024-07-31T13:17:04.711+0000,0.8
F0004,T7981,new_geolocation,2024-07-31T13:17:04.711+0000,0.8
F0005,T7981,new_geolocation,2024-07-31T13:17:04.711+0000,0.8
F0006,T6862,new_geolocation,2024-07-31T13:17:04.711+0000,0.8
F0007,T6205,new_geolocation,2024-07-31T13:17:04.711+0000,0.8
F0008,T6862,new_geolocation,2024-07-31T13:17:04.711+0000,0.8
F0009,T6205,new_geolocation,2024-07-31T13:17:04.711+0000,0.8


In [0]:
merg_df = spark.read.table("silver.merge_table")

In [0]:
merg_df.write.format("delta").mode("overwrite").saveAsTable("gold.merge_tbl")

In [0]:
merg_df.display()

transaction_id,channel,transaction_type,amount,currency,transaction_timestamp,transaction_status,customer_id,customer_name,email,phone,address,credit_score,join_date,last_update,branch_id,branch_name,branch_location,branch_timezone
T5235,ATM,payment,26.81,GBP,2023-02-26T23:11:00.000+0000,completed,C1000,Phillip Hernandez,phillip.hernandez@hotmail.com,(001) 861-4705,"95351 Dustin Manor Apt. 262, South Robertmouth, VA 05601",583,2019-01-01,2024-07-24 00:00:00,B0006,Downtown Branch,Oliverstad,EST
T5235,ATM,payment,26.81,GBP,2023-02-26T23:11:00.000+0000,completed,C1000,Phillip Hernandez,phillip.hernandez@hotmail.com,(001) 861-4705,"95351 Dustin Manor Apt. 262, South Robertmouth, VA 05601",583,2019-01-01,2024-07-24 00:00:00,B0006,Downtown Branch,Oliverstad,EST
T7486,ATM,payment,2.92,EUR,2024-09-08T16:11:00.000+0000,completed,C1000,Phillip Hernandez,phillip.hernandez@hotmail.com,(001) 861-4705,"95351 Dustin Manor Apt. 262, South Robertmouth, VA 05601",583,2019-01-01,2024-07-24 00:00:00,B0002,Downtown Branch,East Jonathanburgh,EST
T7266,branch,withdrawal,61246.35,EUR,2024-07-15T08:28:00.000+0000,completed,C1000,Phillip Hernandez,phillip.hernandez@hotmail.com,(001) 861-4705,"95351 Dustin Manor Apt. 262, South Robertmouth, VA 05601",583,2019-01-01,2024-07-24 00:00:00,B0004,Central Branch,Port Martinton,EST
T7129,mobile,payment,14.67,EUR,2024-06-12T11:32:00.000+0000,completed,C1000,Phillip Hernandez,phillip.hernandez@hotmail.com,(001) 861-4705,"95351 Dustin Manor Apt. 262, South Robertmouth, VA 05601",583,2019-01-01,2024-07-24 00:00:00,B0009,Downtown Branch,North Lisafort,AEST
T7486,ATM,payment,2.92,EUR,2024-09-08T16:11:00.000+0000,completed,C1000,Phillip Hernandez,phillip.hernandez@hotmail.com,(001) 861-4705,"95351 Dustin Manor Apt. 262, South Robertmouth, VA 05601",583,2019-01-01,2024-07-24 00:00:00,B0002,Downtown Branch,East Jonathanburgh,EST
T7266,branch,withdrawal,61246.35,EUR,2024-07-15T08:28:00.000+0000,completed,C1000,Phillip Hernandez,phillip.hernandez@hotmail.com,(001) 861-4705,"95351 Dustin Manor Apt. 262, South Robertmouth, VA 05601",583,2019-01-01,2024-07-24 00:00:00,B0004,Central Branch,Port Martinton,EST
T7129,mobile,payment,14.67,EUR,2024-06-12T11:32:00.000+0000,completed,C1000,Phillip Hernandez,phillip.hernandez@hotmail.com,(001) 861-4705,"95351 Dustin Manor Apt. 262, South Robertmouth, VA 05601",583,2019-01-01,2024-07-24 00:00:00,B0009,Downtown Branch,North Lisafort,AEST
T6591,branch,transfer,14.6,GBP,2024-01-28T05:13:00.000+0000,completed,C1000,Phillip Hernandez,phillip.hernandez@hotmail.com,(001) 861-4705,"95351 Dustin Manor Apt. 262, South Robertmouth, VA 05601",583,2019-01-01,2024-07-24 00:00:00,B0006,Downtown Branch,Oliverstad,EST
T6591,branch,transfer,14.6,GBP,2024-01-28T05:13:00.000+0000,completed,C1000,Phillip Hernandez,phillip.hernandez@hotmail.com,(001) 861-4705,"95351 Dustin Manor Apt. 262, South Robertmouth, VA 05601",583,2019-01-01,2024-07-24 00:00:00,B0006,Downtown Branch,Oliverstad,EST
