# Transform payments data
1. Extract date and time from payment_timestamp and create new columns payment_date and payment_time
2. Map payment_status to contain descriptive values
3. Write transformed data to the Silver schema

In [0]:
df = spark.read.table("gizmobox.bronze.py_payments")
display(df)

## 1. Extract date and time from payment_timestamp

Documentation: https://learn.microsoft.com/en-us/azure/databricks/sql/language-manual/sql-ref-datetime-pattern

In [0]:
# %sql
# SELECT
#   payment_id,
#   order_id,
#   payment_timestamp,
#   CAST(date_format(payment_timestamp, 'yyyy-MM-dd') as date) as payment_date,
#   date_format(payment_timestamp, 'HH:mm:ss') as payment_time,
#   payment_status,
#   payment_method
# FROM gizmobox.bronze.payments;

import pyspark.sql.functions as F
import pyspark.sql.types as T

df = df.select(
  'payment_id',
  'order_id',
  'payment_timestamp',
  F.date_format(F.column('payment_timestamp').cast(T.TimestampType()), 'yyyy-MM-dd').alias('payment_date'),
  F.date_format(F.column('payment_timestamp').cast(T.TimestampType()), 'HH:mm:ss').alias('payment_time'),
  'payment_status',
  'payment_method'
)

display(df)

## 2. Map payment_status to descriptive values
1 - Success, 2 - Pending, 3 - Canceled, 4 - Failed

In [0]:
# %sql
# SELECT
#   payment_id,
#   order_id,
#   CAST(date_format(payment_timestamp, 'yyyy-MM-dd') as date) as payment_date,
#   date_format(payment_timestamp, 'HH:mm:ss') as payment_time,
#   case payment_status
#     when 1 then 'success'
#     when 2 then 'pending'
#     when 3 then 'canceled'
#     when 4 then 'failed'
#   end as payment_method
# FROM gizmobox.bronze.payments;

df = df.withColumn('payment_status',
                   F.when(F.col('payment_status')==1, 'success')\
                     .when(F.col('payment_status')==2, 'pending')\
                      .when(F.col('payment_status')==3, 'canceled')\
                        .when(F.col('payment_status')==4, 'failed'))\
        .drop("payment_timestamp")

display(df)


## 3. Write transformed data to Silver schema

In [0]:
df.writeTo('gizmobox.silver.py_payments').createOrReplace()

In [0]:
%sql
select * 
from gizmobox.silver.py_payments;