# Extract data from the payments file
1. List of files from payment folder
2. Create payments table in Bronze layer

## 1. List the files from payment folder

In [0]:
# we need to specify the schema for these tables when reading
payments_schema = '''
    payment_id INTEGER,
    order_id INTEGER,
    payment_timestamp TIMESTAMP,
    payment_status INTEGER,
    payment_method STRING
'''

df = spark.read.format('csv')\
        .option('delimiter', ',')\
        .schema(payments_schema)\
        .load('abfss://gizmobox@maideacourseextdl.dfs.core.windows.net/landing/external_data/payments')

display(df)

In [0]:
# another way to specify the schema
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, TimestampType

py_payments_schema = StructType([
    StructField('payment_id', IntegerType()),
    StructField('order_id', IntegerType()),
    StructField('payment_timestamp', TimestampType()),
    StructField('payment_status', IntegerType()),
    StructField('payment_method', StringType()),
])

df = spark.read.format('csv')\
        .option('delimiter', ',')\
        .schema(py_payments_schema)\
        .load('abfss://gizmobox@maideacourseextdl.dfs.core.windows.net/landing/external_data/payments')

display(df)

## 2. Create payments table in Bronze layer

In [0]:
df.writeTo('gizmobox.bronze.py_payments').createOrReplace()

In [0]:
%sql
SELECT *
FROM gizmobox.bronze.py_payments;