In [15]:
from pyspark.sql import functions as F
from pyspark.sql.types import IntegerType,BooleanType,DateType,FloatType,StringType,DoubleType,DecimalType
from pyspark.sql.functions import * 

from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext

sc = SparkContext.getOrCreate()
gc = GlueContext(sc)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [16]:
#CREATE DATAFRAME WITH NULL VALUES
source_df = gc.create_data_frame_from_catalog( 
    database = "db_null_values", 
    table_name = "invoices_null_values", 
)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [3]:
source_df.show(32)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+------------------+------+--------------------+-----------+--------+-------------+------------+--------+----------+
|external reference|number|        counterparty| amount due|currency|cost currency|costing rate|due date|issue date|
+------------------+------+--------------------+-----------+--------+-------------+------------+--------+----------+
|          SC25505A|137113|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|      15.862|  24-Jan|     3-Jan|
|          SC25506A|137114|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|      15.862|  24-Jan|     3-Jan|
|          SC25503A|137116|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|      15.862|  24-Jan|     3-Jan|
|          SC25504A|137117|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|      15.862|  24-Jan|     3-Jan|
|          SC25499A|137078|   Fruit Traders LLC| 10,134.00 |     USD|          ZAR|      15.862|  24-Jan|     3-Jan|
|          SC25500A|137079|   Fruit Traders LLC|  9,450.00 |    

In [17]:
# SET DECIMAL PLACES
costing_rate="costing rate"
format_costing_rate = format_number(round(costing_rate ,4), 4)
amount_due="amount due"
format_amount_due=format_number(round(amount_due ,2), 2)

source_df = source_df.withColumn(costing_rate, format_costing_rate) \
# amount due gives null value - not sure why

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [18]:
source_df.show()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+------------------+------+--------------------+-----------+--------+-------------+------------+--------+----------+
|external reference|number|        counterparty| amount due|currency|cost currency|costing rate|due date|issue date|
+------------------+------+--------------------+-----------+--------+-------------+------------+--------+----------+
|          SC25505A|137113|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|     15.8620|  24-Jan|     3-Jan|
|          SC25506A|137114|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|     15.8620|  24-Jan|     3-Jan|
|          SC25503A|137116|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|     15.8620|  24-Jan|     3-Jan|
|          SC25504A|137117|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|     15.8620|  24-Jan|     3-Jan|
|          SC25499A|137078|   Fruit Traders LLC| 10,134.00 |     USD|          ZAR|     15.8620|  24-Jan|     3-Jan|
|          SC25500A|137079|   Fruit Traders LLC|  9,450.00 |    

In [19]:
# ADD YEAR TO DATE COLUMNS

source_df = source_df.withColumn("due date", F.concat(col("due date"),F.lit("-2022"))) \
    .withColumn("issue date", F.concat(col("issue date"),F.lit("-2022")))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [20]:
source_df.show()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+------------------+------+--------------------+-----------+--------+-------------+------------+-----------+----------+
|external reference|number|        counterparty| amount due|currency|cost currency|costing rate|   due date|issue date|
+------------------+------+--------------------+-----------+--------+-------------+------------+-----------+----------+
|          SC25505A|137113|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|     15.8620|24-Jan-2022|3-Jan-2022|
|          SC25506A|137114|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|     15.8620|24-Jan-2022|3-Jan-2022|
|          SC25503A|137116|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|     15.8620|24-Jan-2022|3-Jan-2022|
|          SC25504A|137117|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|     15.8620|24-Jan-2022|3-Jan-2022|
|          SC25499A|137078|   Fruit Traders LLC| 10,134.00 |     USD|          ZAR|     15.8620|24-Jan-2022|3-Jan-2022|
|          SC25500A|137079|   Fruit Trad

In [21]:
# FORMAT DATES
source_df = source_df.select(
    col("external reference"), \
    col("number"), \
    col("counterparty"), \
    col("amount due"), \
    col("currency"), \
    col("cost currency"), \
    col("costing rate"), \
    col("due date"),to_date(F.col("due date"), "d-MMM-yyy"), \
    col("issue date"),to_date(F.col("issue date"), "d-MMM-yyy")) \

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [22]:
source_df.show()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+------------------+------+--------------------+-----------+--------+-------------+------------+-----------+--------------------------------+----------+----------------------------------+
|external reference|number|        counterparty| amount due|currency|cost currency|costing rate|   due date|to_date(`due date`, 'd-MMM-yyy')|issue date|to_date(`issue date`, 'd-MMM-yyy')|
+------------------+------+--------------------+-----------+--------+-------------+------------+-----------+--------------------------------+----------+----------------------------------+
|          SC25505A|137113|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|     15.8620|24-Jan-2022|                      2022-01-24|3-Jan-2022|                        2022-01-03|
|          SC25506A|137114|   Fruit Traders LLC| 10,800.00 |     USD|          ZAR|     15.8620|24-Jan-2022|                      2022-01-24|3-Jan-2022|                        2022-01-03|
|          SC25503A|137116|   Fruit Traders LLC| 10,800.00 |