In [0]:
filepath = "dbfs:/FileStore/GlobalRetail/bronze_layer/transaction/transaction_snappy.parquet"
df = spark.read.parquet(filepath)

In [0]:
from pyspark.sql.functions import to_timestamp, col
new_df = df.withColumn("transaction_date", to_timestamp(col("transaction_date")))

In [0]:
new_df.printSchema()

root
 |-- transaction_id: string (nullable = true)
 |-- customer_id: integer (nullable = true)
 |-- product_id: integer (nullable = true)
 |-- quantity: integer (nullable = true)
 |-- total_amount: double (nullable = true)
 |-- transaction_date: timestamp (nullable = true)
 |-- payment_method: string (nullable = true)
 |-- store_type: string (nullable = true)



In [0]:
display(new_df)

transaction_id,customer_id,product_id,quantity,total_amount,transaction_date,payment_method,store_type
TRX000001,802,425,1,363.4,2020-07-27T00:00:00.000+0000,Debit Card,Physical Store
TRX000002,858,280,6,758.18,2022-08-10T00:00:00.000+0000,Credit Card,Physical Store
TRX000003,658,694,9,748.66,2020-05-22T00:00:00.000+0000,Bank Transfer,Online
TRX000004,516,930,4,933.78,,Bank Transfer,Physical Store
TRX000005,368,104,10,137.28,2022-06-24T00:00:00.000+0000,PayPal,Physical Store
TRX000006,606,409,6,556.88,2020-10-14T00:00:00.000+0000,Credit Card,Online
TRX000007,535,487,7,246.29,2023-08-19T00:00:00.000+0000,Credit Card,Physical Store
TRX000008,87,630,8,-416.82,2020-04-18T00:00:00.000+0000,Credit Card,Physical Store
TRX000009,383,617,10,405.05,2021-02-13T00:00:00.000+0000,Cash,Physical Store
TRX000010,725,938,1,410.03,2020-03-18T00:00:00.000+0000,PayPal,Online


In [0]:
from pyspark.sql.functions import current_timestamp
final_df = df.withColumn('ingestion_timestamp', current_timestamp())

In [0]:
display(final_df)

transaction_id,customer_id,product_id,quantity,total_amount,transaction_date,payment_method,store_type,ingestion_timestamp
TRX000001,802,425,1,363.4,2020-07-27,Debit Card,Physical Store,2025-02-17T13:01:52.634+0000
TRX000002,858,280,6,758.18,2022-08-10,Credit Card,Physical Store,2025-02-17T13:01:52.634+0000
TRX000003,658,694,9,748.66,2020-05-22,Bank Transfer,Online,2025-02-17T13:01:52.634+0000
TRX000004,516,930,4,933.78,,Bank Transfer,Physical Store,2025-02-17T13:01:52.634+0000
TRX000005,368,104,10,137.28,2022-06-24,PayPal,Physical Store,2025-02-17T13:01:52.634+0000
TRX000006,606,409,6,556.88,2020-10-14,Credit Card,Online,2025-02-17T13:01:52.634+0000
TRX000007,535,487,7,246.29,2023-08-19,Credit Card,Physical Store,2025-02-17T13:01:52.634+0000
TRX000008,87,630,8,-416.82,2020-04-18,Credit Card,Physical Store,2025-02-17T13:01:52.634+0000
TRX000009,383,617,10,405.05,2021-02-13,Cash,Physical Store,2025-02-17T13:01:52.634+0000
TRX000010,725,938,1,410.03,2020-03-18,PayPal,Online,2025-02-17T13:01:52.634+0000


In [0]:
spark.sql("use globalretail_bronze")
final_df.write.format("delta").mode("append").saveAsTable("bronze_transactions")

In [0]:
%sql
select * from bronze_products
limit 10;

brand,category,is_active,name,price,product_id,rating,stock_quantity,ingestion_timestamp
BeautyGlow,Toys,True,Product 1,995.73,1,3.5,989,2025-02-17T13:01:38.209+0000
GardenMaster,Garden,True,Product 2,497.76,2,3.8,495,2025-02-17T13:01:38.209+0000
BeautyGlow,Electronics,True,Product 3,331.63,3,4.6,10,2025-02-17T13:01:38.209+0000
TechPro,Beauty,False,Product 4,798.83,4,4.7,683,2025-02-17T13:01:38.209+0000
HomeSmart,Automotive,False,Product 5,-454.98,5,4.4,719,2025-02-17T13:01:38.209+0000
BookWorm,Electronics,False,Product 6,645.3,6,2.2,823,2025-02-17T13:01:38.209+0000
FashionX,Automotive,False,Product 7,549.08,7,1.1,999,2025-02-17T13:01:38.209+0000
TechPro,Books,False,Product 8,982.36,8,2.4,542,2025-02-17T13:01:38.209+0000
FashionX,Toys,True,Product 9,307.14,9,1.0,671,2025-02-17T13:01:38.209+0000
BeautyGlow,Garden,False,Product 10,871.38,10,3.4,975,2025-02-17T13:01:38.209+0000


In [0]:
import datetime
archive_folder = "dbfs:/FileStore/GlobalRetail/bronze_layer/transaction/archive/"
archive_filepath = archive_folder +'_'+ datetime.datetime.now().strftime("%Y%m%d%H%s")
dbutils.fs.mv(filepath, archive_filepath)


Out[17]: True

In [0]:
print(archive_filepath)

dbfs:/FileStore/GlobalRetail/bronze_layer/transaction/archive/_20250217131739797319
