In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType, ArrayType, LongType
from pyspark.sql.functions import explode

class Invoice:

    def __init__(self):
        self.invoice_dir = '/FileStore/invoices'
        self.schema = StructType(
                            [
                                StructField('InvoiceNumber', StringType()),
                                StructField('CreatedTime', LongType()),
                                StructField('StoreID', StringType()),
                                StructField('PosID', StringType()),
                                StructField('CashierID', StringType()),
                                StructField('CustomerType', StringType()),
                                StructField('CustomerCardNo', StringType()),
                                StructField('TotalAmount', DoubleType()),
                                StructField('NumberOfItems', IntegerType()),
                                StructField('PaymentMethod', StringType()),
                                StructField('TaxableAmount', DoubleType()),
                                StructField('CGST', DoubleType()),
                                StructField('SGST', DoubleType()),
                                StructField('CESS', DoubleType()),
                                StructField('DeliveryType', StringType()),
                                StructField('DeliveryAddress', StructType(
                                    [
                                        StructField('AddressLine', StringType()),
                                        StructField('City', StringType()),
                                        StructField('State', StringType()),
                                        StructField('PinCode', StringType()),
                                        StructField('ContactNumber', StringType())
                                    ]), True),
                                StructField('InvoiceLineItems', ArrayType(
                                    StructType(
                                        [
                                            StructField('ItemCode', StringType()),
                                            StructField('ItemDescription', StringType()),
                                            StructField('ItemPrice', DoubleType()),
                                            StructField('ItemQty', IntegerType()),
                                            StructField('TotalValue', DoubleType())
                                        ]
                                    )
                                ))])
    
    def get_raw_data(self):
        return (spark.read.format('json').schema(self.schema)\
                             .load(f'{self.invoice_dir}/incoming'))

    def apply_transformation(self, df):
        return (df.withColumn('InvoiceLineItems',explode(df.InvoiceLineItems)).select(
            "InvoiceNumber", "CreatedTime", "StoreID", "PosID", "CashierID", "CustomerType", "CustomerCardNo", "TotalAmount", "NumberOfItems", "PaymentMethod", "TaxableAmount", "CGST", "SGST", "CESS", 
            "DeliveryType", "DeliveryAddress.AddressLine", "DeliveryAddress.City", "DeliveryAddress.State",
            "DeliveryAddress.PinCode", "DeliveryAddress.ContactNumber", "InvoiceLineItems.ItemCode", 
            "InvoiceLineItems.ItemDescription", "InvoiceLineItems.ItemPrice", "InvoiceLineItems.ItemQty", "InvoiceLineItems.TotalValue"))
    
    def sink(self, df):
        return (df.write.format('delta').mode('append').saveAsTable('invoice_table'))
    
    def launcher(self):
        print('Invoice Batch load started...',end='')
        raw_df = self.get_raw_data()
        trans_df = self.apply_transformation(raw_df)
        self.sink(trans_df)
        print('Done.')
    