#INIT

In [0]:
from pyspark.sql.functions import col, when, lit, create_map, coalesce, row_number, to_date
from pyspark.sql.window import Window
from itertools import chain

classificationMap = {
    1: "NewPawn",
    2: "Repawn",
    4: "TakeOutPawnBranch",
    8: "TakeOutPawnMarketing",
    16: "AcquiredPawn",
    32: "Restructured"
}

status_to_tag = {
    2: "L02_Renewed",
    4: "L02_Redeemed",
    128: "L02_RenewedOnline",
    16: "L02_Foreclosed"
}

COVERED_DATE = {
    "start": "2025-01-01",
    "end": "2025-01-31"
}

#Read From Bronze Tables

##Read Loans By LoanDate

In [0]:
query = f"""
    SELECT
        l.Id                             AS id,
        l.FromLoanId                     AS fromloanid,
        l.Classification                 AS classification,
        l.BranchCode                     AS branchcode,
        l.LoanSchemeId                   AS loanschemeid,
        l.PawnTicketNo                   AS pawnticketno,
        l.PawnCategoryId                 AS pawncategoryid,
        l.CustomerId                     AS customerid,
        l.CustomerPresentedIds           AS customerpresentedids,
        l.CustomerPurpose                AS customerpurpose,
        l.Remarks                        AS remarks,
        l.PawnerTypeId                   AS pawntypeid,
        l.SukiCardNo1                    AS sukicardno1,
        l.SukiCardNo2                    AS sukicardno2,
        l.LoanDate                       AS loandate,
        l.RedemptionDate                 AS redemptiondate,
        l.ExpiryDate                     AS expirydate,
        l.MaturityDate                   AS maturitydate,
        l.ForeclosureDate                AS foreclosuredate,
        l.MaxLoanAmount                  AS maxloanamount,
        l.InterestRate                   AS interestrate,
        l.EffectiveInterestRate          AS effectiveinterestrate,
        l.Principal                      AS principal,
        l.Status                         AS status,
        l.NetPay                         AS netpay,
        l.NetProceeds                    AS netproceeds,
        l.LastModified                   AS lastmodified,
        l.AppraiserId                    AS appraiserid,
        l.Username                       AS username,
        l.UserFullname                   AS userfullname,
        l.OnlinePaymentProvider          AS onlinepaymentprovider,
        l.OnlinePaymentReference         AS onlinepaymentreference,
        l.OriginalLoanPeriod             AS originalloanperiod,
        l.OriginalLoanDate               AS originalloandate,
        l.CreatedOn                      AS createdon,
        ls.Name                          AS loanschemename,
        l.LoanDate                       AS transactiondate,
        l.EtlAddOn                       AS etladdon
    FROM workspace.bronze.bronze_loans l
    LEFT JOIN workspace.bronze.bronze_loanschemes ls
        ON l.LoanSchemeId = ls.Id
    WHERE l.LoanDate >= DATE('{COVERED_DATE['start']}')
    AND l.LoanDate <  DATE('{COVERED_DATE['end']}') + INTERVAL 1 DAY
"""

loandate_df = spark.sql(query)

##Read Loans By Redemption Date

In [0]:
query = f"""
    WITH filtered_loans AS (
        SELECT
            l.Id,
            l.FromLoanId,
            l.Classification,
            l.BranchCode,
            l.LoanSchemeId,
            l.PawnTicketNo,
            l.PawnCategoryId,
            l.CustomerId,
            l.CustomerPresentedIds,
            l.CustomerPurpose,
            l.Remarks,
            l.PawnerTypeId,
            l.SukiCardNo1,
            l.SukiCardNo2,
            l.LoanDate,
            l.RedemptionDate,
            l.ExpiryDate,
            l.MaturityDate,
            l.ForeclosureDate,
            l.MaxLoanAmount,
            l.InterestRate,
            l.EffectiveInterestRate,
            l.Principal,
            l.Status,
            l.NetPay,
            l.NetProceeds,
            l.LastModified,
            l.AppraiserId,
            l.Username,
            l.UserFullname,
            l.OnlinePaymentProvider,
            l.OnlinePaymentReference,
            l.OriginalLoanPeriod,
            l.OriginalLoanDate,
            l.CreatedOn,
            l.EtlAddOn
        FROM bronze.bronze_loans l
        WHERE TRY_CAST(l.RedemptionDate AS DATE) >= DATE('{COVERED_DATE["start"]}')
        AND TRY_CAST(l.RedemptionDate AS DATE) < DATE('{COVERED_DATE["end"]}') + INTERVAL 1 DAY
    ),
    ranked_logs AS (
        SELECT
            lsl.LoanId,
            lsl.LastModified,
            ROW_NUMBER() OVER (
                PARTITION BY lsl.LoanId
                ORDER BY lsl.LastModified DESC
            ) AS rn
        FROM bronze.bronze_loanstatuslogs lsl
        INNER JOIN filtered_loans fl
            ON fl.Id = lsl.LoanId
    )

    SELECT
        fl.Id                               AS id,
        fl.FromLoanId                      AS fromloanid,
        fl.Classification                  AS classification,
        fl.BranchCode                      AS branchcode,
        fl.LoanSchemeId                    AS loanschemeid,
        fl.PawnTicketNo                    AS pawnticketno,
        fl.PawnCategoryId                  AS pawncategoryid,
        fl.CustomerId                      AS customerid,
        fl.CustomerPresentedIds            AS customerpresentedids,
        fl.CustomerPurpose                 AS customerpurpose,
        fl.Remarks                         AS remarks,
        fl.PawnerTypeId                    AS pawntypeid,
        fl.SukiCardNo1                     AS sukicardno1,
        fl.SukiCardNo2                     AS sukicardno2,
        fl.LoanDate                        AS loandate,
        fl.RedemptionDate                  AS redemptiondate,
        fl.ExpiryDate                      AS expirydate,
        fl.MaturityDate                    AS maturitydate,
        fl.ForeclosureDate                 AS foreclosuredate,
        fl.MaxLoanAmount                   AS maxloanamount,
        fl.InterestRate                    AS interestrate,
        fl.EffectiveInterestRate           AS effectiveinterestrate,
        fl.Principal                       AS principal,
        fl.Status                          AS status,
        fl.NetPay                          AS netpay,
        fl.NetProceeds                     AS netproceeds,
        fl.LastModified                    AS lastmodified,
        fl.AppraiserId                     AS appraiserid,
        fl.Username                        AS username,
        fl.UserFullname                    AS userfullname,
        fl.OnlinePaymentProvider           AS onlinepaymentprovider,
        fl.OnlinePaymentReference          AS onlinepaymentreference,
        fl.OriginalLoanPeriod              AS originalloanperiod,
        fl.OriginalLoanDate                AS originalloandate,
        fl.CreatedOn                       AS createdon,
        ls.Name                            AS loanschemename,
        fl.RedemptionDate                  AS transactiondate,
        rl.LastModified                    AS lsllastmodified,
        fl.EtlAddOn                        AS etladdon
    FROM filtered_loans fl
    LEFT JOIN ranked_logs rl
        ON fl.Id = rl.LoanId
    AND rl.rn = 1
    LEFT JOIN bronze.bronze_loanschemes ls
        ON fl.LoanSchemeId = ls.Id
"""

redemptiondate_df = spark.sql(query)

#Data Transformations

In [0]:
classification_map = create_map(
    *chain.from_iterable(
        (lit(k), lit(v)) for k, v in classificationMap.items()
    )
)

##LoanDate DF Data Transformations

###Identifying and Assigning the Pawn Classification

In [0]:
loandate_df = loandate_df.withColumn("classification", coalesce(classification_map[col("classification")], lit("Unknown"))).withColumn("loantranxtype", lit(1))

###Assigning the Tag

In [0]:
loandate_df = loandate_df.withColumn(
    "tag",
    coalesce(
        when(col("fromloanid") == "NULL", lit("L01_NewPawn")),
        lit("L01_FrRenewal")
    )
)

##RedemptionDate DF Data Transformations

###Group the loans and get the latest base on Last Modified of Loan Status Logs

In [0]:
window_spec = Window.partitionBy("id").orderBy(col("lsllastmodified").desc())

redemptiondate_df = redemptiondate_df.withColumn("rn", row_number().over(window_spec)).filter(col("rn") == 1).drop("rn")

###Asigning the Tag column

In [0]:
status_map_tag = create_map(
    *chain.from_iterable((lit(k), lit(v)) for k, v in status_to_tag.items())
)

redemptiondate_df = redemptiondate_df.withColumn("tag", coalesce(status_map_tag[col("status")], lit("Unknown")))

### Override The Transaction Date if status is 32 or 64

In [0]:
redemptiondate_df = redemptiondate_df.withColumn("transactiondate", when(col("status").isin(32, 64), to_date(col("lsllastmodified"))).otherwise(col("transactiondate")))

###Identifying and Assigning the Pawn Classification

In [0]:
redemptiondate_df = redemptiondate_df.withColumn("classification", coalesce(classification_map[col("classification")], lit("Unknown"))).withColumn("loantranxtype", lit(2))

###Drop lsllastmodified column

In [0]:
redemptiondate_df = redemptiondate_df.drop("lsllastmodified")

##Combine LoanDate and RedemptionDate Delta Tables

In [0]:
loans_df = loandate_df.unionByName(redemptiondate_df)

#Write To Silver Loans Database

In [0]:
loans_df.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("silver.silver_loans")