In [0]:
from pyspark.sql.functions import *
from loguru import logger

In [0]:
dbutils.widgets.text("Mode","")
dbutils.widgets.text("processed_Path","")

In [0]:
mode=dbutils.widgets.get("Mode")
processed_Path=dbutils.widgets.get("processed_Path")

In [0]:
class fact_quality_material_movement:
    def __init__(self):
        pass

    def transform_invoke(self):
        logger.info("Reading tables from cur database")
        df_mseg = sql("SELECT * FROM cur.mseg")
        df_mkpf = sql("SELECT * FROM cur.mkpf")

        logger.info("Joining mkpf and mseg on MBLNR and MJAHR")
        df_joined = df_mkpf.join(
            df_mseg,
            (col("mkpf.MBLNR") == col("mseg.MBLNR")) & (col("mkpf.MJAHR") == col("mseg.MJAHR")),
            how="left"
        )

        logger.info("Applying transformations and selecting required columns")
        df_transformed = df_joined.select(
            col("mkpf.MBLNR").alias("MATERIAL_DOC_NO"),
            col("mkpf.MJAHR").alias("MATERIAL_DOC_YEAR"),
            col("mkpf.BUDAT").alias("POSTING_DATE"),
            col("mkpf.CPUDT").alias("ENTRY_DATE"),
            date_format(col("mkpf.CPUTM"), "HH:mm").alias("ENTRY_TIME"),
            col("mkpf.USNAM").alias("USERNAME"),
            col("mseg.ZEILE").alias("MATERIAL_DOC_ITEM"),
            col("mseg.BWART").alias("MOVEMENT_TYPE"),
            trim(col("mseg.MATNR")).alias("MATERIAL_NO"),
            col("mseg.CHARG").alias("BATCH_ID"),
            col("mseg.MENGE").alias("QUANTITY"),
            col("mseg.DMBTR").alias("AMOUNT_IN_LC"),
            col("mseg.LGORT").alias("SENDER_STORAGE_LOCATION"),
            col("mseg.WERKS").alias("SENDER_PLANT"),
            col("mseg.UMLGO").alias("RECEIVING_STORAGE_LOCATION"),
            col("mseg.UMWRK").alias("RECEIVING_PLANT"),
            trim(col("mseg.KUNNR")).alias("CUSTOMER_ID"),
            col("mseg.FISTL").alias("FUNDS_CENTER"),
            col("mseg.ERFME").alias("UNIT_OF_ENTRY"),
            col("mseg.MAT_KDAUF").alias("SALES_ORDER_NO"),
            col("mseg.MAT_KDPOS").alias("SALES_ORDER_ITEM"),
            col("mseg.MAT_KDAUF"),
            col("mseg.MAT_KDPOS"),
        )

        logger.info("Applying fallback logic for SALES_ORDER_NO and SALES_ORDER_ITEM")
        df_transformed = df_transformed.withColumn(
            "SALES_ORDER_NO",
            when(col("SALES_ORDER_NO").isNull() | (col("SALES_ORDER_NO") == ""), col("MAT_KDAUF"))
            .otherwise(col("SALES_ORDER_NO"))
        ).withColumn(
            "SALES_ORDER_ITEM",
            when((col("SALES_ORDER_ITEM").isNull()) | (col("SALES_ORDER_ITEM") == "") | (col("SALES_ORDER_ITEM") == "000000"),
                 col("MAT_KDPOS")).otherwise(col("SALES_ORDER_ITEM"))
        ).drop("MAT_KDAUF", "MAT_KDPOS")

        final_df = df_transformed
        logger.info("Transformation complete")
        return final_df


In [0]:
if __name__ == "__main__":
    transformer = fact_quality_material_movement()
    final_df= transformer.transform_invoke()


In [0]:
final_df.write.format("parquet").mode(mode).save(processed_Path)