In [0]:
v_environment = dbutils.widgets.get("p_environment_name")


### Function: `explode_and_load_rows_safe`
 
### Purpose

Performs a **safe, multi-level array explosion** and **validated load** from a source Delta table into a target Delta table.  

Designed for healthcare-format ingestion pipelines where **data integrity**, **auditability**, and **error quarantining** are critical.
 
---
 
### Main Steps
 
#### 1. **Setup & Initialization**

- Generates a unique `RunID` for traceability.  

- Loads the source table into a DataFrame with a technical `_row_id`.  

- Initializes counters for good/bad records.
 
---
 
#### 2. **Array Explosion (Multi-Level)**

- Supports **multiple nested array columns** via the `explode_columns` list.  

- Each exploded array is **aliased** (`exploded_array_1`, `exploded_array_2`, etc.) for clarity and reusability.  

- Uses `explode_outer` by default (keeps parent rows even when arrays are null).
 
---
 
#### 3. **Field Mappings & Lineage**

- Applies `field_mappings` to map source ‚Üí target fields.  

- Retains `_row_id` for traceability.  

- Adds audit columns:

  - `DateTimeCreated`, `CreatedByUser`  

  - `DateTimeLastModified`, `ModifiedByUser`
 
---
 
#### 4. **Validation Against Target Schema**

- Reads the target table‚Äôs schema to determine which fields are **NOT NULL**.  

- Checks for actual null violations in the current run (column-level).  

- Splits the dataset into:

  - `good_df` ‚Üí valid rows  

  - `bad_df` ‚Üí rows violating NOT NULL constraints
 
---
 
#### 5. **Compact RawRecord Creation**

- Builds a normalized JSON snapshot (`RawRecord`) per row:

  - Converts empty strings to nulls.  

  - Drops null and empty fields using `map_filter(create_map(...))`.  

- This provides a compact, schema-agnostic view of the original data.
 
---
 
#### 6. **Error Quarantine**

- All violating rows (`bad_df`) are logged to a dedicated **error Delta table**:

  - Includes `RunID`, `ErrorRecordID`, `835UID` (if available), and `RawRecord`.  

  - Captures context: `SourceTable`, `TargetTable`, timestamp, and violation message.  

- Supports linking errors back to the original dataset using `_row_id`.
 
---
 
#### 7. **Data Merge into Target**

- Loads valid rows into the target using a **Delta MERGE**:

  - Keys are defined by `key_mappings`.  

  - Updates existing records and inserts new ones.  

  - Maintains audit column updates.
 
---
 
#### 8. **Robust Exception Handling**

- Catches **all exceptions** gracefully:

  - Logs meaningful metadata (`RunID`, source, target, message).  

  - Writes even single-run-level errors to the `error_table`.  

  - Avoids blowing up due to massive DataFrame prints.
 
---
 
### Output Summary

Returns a Python dictionary:

```python
{
  "run_id": <unique_uuid>,

  "good_rows": <count_loaded>,

  "bad_rows": <count_quarantined>

}

 

In [0]:
import uuid
from functools import reduce
from pyspark.sql import SparkSession
from pyspark.sql.functions import (
    explode,
    col,
    split,
    lit,
    current_timestamp,
    current_user,
    to_json,
    struct,
    monotonically_increasing_id,
    concat_ws,
    create_map,
    map_filter,
    when,
    sum as _sum,
)

def explode_and_load_rows_safe(
    source_table: str,
    target_table: str,
    field_mappings: dict,
    key_mappings: list,
    explode_columns: list = None,
    error_table: str = f"{v_environment}_analytics.db_000000_healthcareformats.loading_error_records",
    uid_col: str = "835UID",
    use_explode_outer: bool = True,
):
    """
    Safe explode-and-load:
      - Multiple nested array explosion (aliased levels)
      - Pre-validation against target NOT NULLs (only flags columns actually null this run)
      - Compact RawRecord (drops nulls and empty strings)
      - Error logs include SourceTable & TargetTable
      - Robust exception path (no 'log the entire dataset' surprises)
    """
    from pyspark.sql.functions import explode_outer as _explode_outer

    spark = SparkSession.getActiveSession()
    run_id = str(uuid.uuid4())
    print(f"üöÄ Starting run {run_id} for table {source_table}")

    # Always-defined counters (prevents NameError in any path)
    good_count = 0
    bad_count = 0

    # Load source + lineage id
    df = spark.table(source_table).withColumn("_row_id", monotonically_increasing_id())

    try:
        # 1) Explode with aliases
        alias_map = {}
        if explode_columns:
            for i, explode_col in enumerate(explode_columns):
                alias = f"exploded_array_{i+1}"
                alias_map[explode_col] = alias
                exploder = _explode_outer if use_explode_outer else explode
                df = df.withColumn(alias, exploder(col(explode_col)))

            print("üó∫Ô∏è Explode alias map:")
            for src, al in alias_map.items():
                print(f"   {src} -> {al}")

        # 2) Apply field mappings (+ carry lineage)
        selected_cols = [
            (col(src_expr).alias(tgt_col) if isinstance(src_expr, str) else src_expr.alias(tgt_col))
            for tgt_col, src_expr in field_mappings.items()
        ]
        selected_cols.append(col("_row_id"))
        flattened_df = df.select(*selected_cols)

        # 3) Audit columns
        enriched_df = (
            flattened_df.withColumn("DateTimeCreated", current_timestamp())
                        .withColumn("CreatedByUser", current_user())
                        .withColumn("DateTimeLastModified", current_timestamp())
                        .withColumn("ModifiedByUser", current_user())
        )

        # 4) Determine NOT NULLs from target schema
        target_schema = spark.table(target_table).schema
        target_not_null = [f.name for f in target_schema.fields if not f.nullable]

        insert_cols = list(field_mappings.keys()) + [
            "DateTimeCreated", "CreatedByUser", "DateTimeLastModified", "ModifiedByUser"
        ]
        required_not_null = [c for c in insert_cols if c in target_not_null and c in enriched_df.columns]

        # 5) Row-level detection of actual violating columns (prevents false positives)
        violating_cols = []
        for c in required_not_null:
            n = enriched_df.select(_sum(col(c).isNull().cast("int")).alias("cnt")).collect()[0]["cnt"]
            if n and n > 0:
                violating_cols.append(c)

        if violating_cols:
            bad_df = enriched_df.where(reduce(lambda a, b: a | b, [col(c).isNull() for c in violating_cols]))
            good_df = enriched_df.where(~reduce(lambda a, b: a | b, [col(c).isNull() for c in violating_cols]))
        else:
            bad_df = enriched_df.limit(0)
            good_df = enriched_df

        # 6) Build compact RawRecord (drop NULLs and empty strings)
        #    Normalize "" -> NULL first, then use map_filter to keep non-nulls only
        normalized_for_raw = flattened_df.select(
            "_row_id",
            *[
                when(col(c) == "", None).otherwise(col(c)).alias(c)
                for c in field_mappings.keys()
            ]
        )
        key_lits = [lit(c) for c in field_mappings.keys()]
        cols_for_raw = [col(c) for c in field_mappings.keys()]
        raw_snapshot = normalized_for_raw.select(
            "_row_id",
            to_json(
                map_filter(
                    create_map(*[x for pair in zip(key_lits, cols_for_raw) for x in pair]),
                    lambda _, v: v.isNotNull()
                )
            ).alias("RawRecord")
        )

        # 7) Quarantine violators
        bad_count = bad_df.count()
        if bad_count > 0:
            bad_with_raw = (
                bad_df.join(raw_snapshot, on="_row_id", how="left")
                      .withColumn("RunID", lit(run_id))
                      .withColumn("ErrorRecordID", concat_ws("_", lit(run_id), monotonically_increasing_id()))
                      .withColumn(uid_col, col(uid_col))
                      .withColumn("ErrorMessage", lit("NOT NULL constraint violation on: " + ", ".join(violating_cols)))
                      .withColumn("Timestamp", current_timestamp())
                      .withColumn("SourceTable", lit(source_table))
                      .withColumn("TargetTable", lit(target_table))
                      .select("RunID", "ErrorRecordID", uid_col, "ErrorMessage",
                              "Timestamp", "SourceTable", "TargetTable", "RawRecord")
            )
            bad_with_raw.write.mode("append").format("delta").saveAsTable(error_table)
            print(f"‚ö†Ô∏è Quarantined {bad_count} rows ‚Üí {error_table}")

        # 8) Merge valid rows
        good_df = good_df.drop("_row_id")
        good_df.createOrReplaceTempView("exploded_view")
        good_count = good_df.count()

        if good_count > 0:
            merge_sql = f"""
                MERGE INTO {target_table} AS tgt
                USING exploded_view AS src
                ON {" AND ".join([f"tgt.{c}=src.{c}" for c in key_mappings])}
                WHEN MATCHED THEN UPDATE SET
                    {", ".join([f"tgt.{c}=src.{c}" for c in field_mappings] + [
                        "tgt.DateTimeLastModified=src.DateTimeLastModified",
                        "tgt.ModifiedByUser=src.ModifiedByUser"
                    ])}
                WHEN NOT MATCHED THEN INSERT ({", ".join(insert_cols)})
                VALUES ({", ".join([f"src.{c}" for c in insert_cols])})
            """
            spark.sql(merge_sql)
            print(f"‚úÖ Loaded {good_count} valid rows into {target_table}")
        else:
            print("‚ö†Ô∏è No valid rows to load after validation.")

        if bad_count > 0:
            print(f"‚ö†Ô∏è {bad_count} rows skipped (see {error_table})")

        print(f"üßæ RunID: {run_id}")
        return {"run_id": run_id, "good_rows": good_count, "bad_rows": bad_count}

    except Exception as e:
        # ---- Robust exception path (no dataset blow-up) ----
        error_message = str(e).replace("'", "")
        print(f"‚ùå Error during load: {error_message}")

        try:
            # If we already computed specific bad rows, log those; otherwise log ONE run-level row
            if "bad_df" in locals() and bad_df is not None:
                # Join compact RawRecord if available; otherwise just write minimal info
                if "raw_snapshot" in locals() and raw_snapshot is not None:
                    err_rows = (
                        bad_df.select("_row_id", *( [uid_col] if uid_col in bad_df.columns else [] ))
                              .join(raw_snapshot, on="_row_id", how="left")
                    )
                    error_df = (
                        err_rows.withColumn("RunID", lit(run_id))
                                .withColumn("ErrorRecordID", concat_ws("_", lit(run_id), monotonically_increasing_id()))
                                .withColumn("ErrorMessage", lit(error_message))
                                .withColumn("Timestamp", current_timestamp())
                                .withColumn("SourceTable", lit(source_table))
                                .withColumn("TargetTable", lit(target_table))
                                .select("RunID", "ErrorRecordID",
                                        *( [uid_col] if uid_col in err_rows.columns else [] ),
                                        "ErrorMessage", "Timestamp", "SourceTable", "TargetTable", "RawRecord")
                    )
                else:
                    # Minimal fields if no raw snapshot available
                    base = bad_df.select("_row_id", *( [uid_col] if uid_col in bad_df.columns else [] ))
                    error_df = (
                        base.withColumn("RunID", lit(run_id))
                            .withColumn("ErrorRecordID", concat_ws("_", lit(run_id), monotonically_increasing_id()))
                            .withColumn("ErrorMessage", lit(error_message))
                            .withColumn("Timestamp", current_timestamp())
                            .withColumn("SourceTable", lit(source_table))
                            .withColumn("TargetTable", lit(target_table))
                            .select("RunID", "ErrorRecordID",
                                    *( [uid_col] if uid_col in base.columns else [] ),
                                    "ErrorMessage", "Timestamp", "SourceTable", "TargetTable")
                    )
            else:
                # Single run-level error row
                from pyspark.sql import Row
                error_df = spark.createDataFrame([
                    Row(
                        RunID=run_id,
                        ErrorRecordID=f"{run_id}_0",
                        **({uid_col: None} if uid_col else {}),
                        ErrorMessage=error_message,
                        Timestamp=None,
                        SourceTable=source_table,
                        TargetTable=target_table,
                        RawRecord=None
                    )
                ])

            error_df.write.mode("append").format("delta").saveAsTable(error_table)
            print(f"‚ö†Ô∏è Logged error rows to {error_table}")
        except Exception as log_err:
            print(f"‚ö†Ô∏è Failed to write to error table during exception handling: {log_err}")

        print(f"üßæ RunID: {run_id}")
        return {"run_id": run_id, "good_rows": good_count, "bad_rows": bad_count}




In [0]:
explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l1000a_payer",
    field_mappings={
        "835UID": "835UID",
        "EntityIdentifierCode": "payer.entity_identifier_code",
        "PayerName": "payer.payer_name",
        "IDCodeQualifier": "payer.id_code_qualifier",
        "PayerIdentifier": "payer.payer_identifier",
        "EntityRelationshipCode": "payer.entity_relationship_code",
        "PayerAddressLine1": "payer.payer_address_line_1",
        "PayerAddressLine2": "payer.payer_address_line_2",
        "PayerCityName": "payer.payer_city_name",
        "PayerStateCode": "payer.payer_state_code",
        "PayerPostalZoneorZIPCode": "payer.payer_postal_zone_or_zip_code",
        "CountryCode": "payer.country_code",
        "LocationQualifier": "payer.location_qualifier",
        "CountrySubdivisionCode": "payer.country_subdivision_code"
    },
    key_mappings=["835UID", "EntityIdentifierCode"],
    explode_columns=[]
)


In [0]:
explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l1000a_per_payercontacts",
    field_mappings={
        "835UID": "835UID",
        "EntityIdentifierCode": "payer.entity_identifier_code",
        "ContactFunctionCode": "exploded_array_1.contact_function_cd",
        "PayerContactName": "exploded_array_1.contact_name",
        "CommunicationNumberQualifier": "exploded_array_1.communication_number_qualifier1",
        "PayerContactCommunication1": "exploded_array_1.contact_communication1",
        "CommunicationNumberQualifier2": "exploded_array_1.communication_number_qualifier2",
        "PayerContactCommunication2": "exploded_array_1.contact_communication2",
        "CommunicationNumberQualifier3": "exploded_array_1.communication_number_qualifier3",
        "PayerContactCommunication3": "exploded_array_1.contact_communication3",
        "ContactInquiryReference": "exploded_array_1.contact_inquiry_reference"
    },
    key_mappings=[
        "835UID",
        "EntityIdentifierCode",
        "ContactFunctionCode"
    ],
    explode_columns=["payer.payer_contact_info"]
)



In [0]:
explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l1000a_ref_payeradditionalidentification",
    field_mappings={
        "835UID": "835UID",
        "EntityIdentifierCode": "payer.entity_identifier_code",
        "ReferenceIdentificationQualifier": "exploded_array_1.id_qualifier_code",
        "AdditionalPayerID": "exploded_array_1.id",
        "Description": "exploded_array_1.description"
    },
    key_mappings=[
        "835UID",
        "EntityIdentifierCode",
        "ReferenceIdentificationQualifier"
    ],
    explode_columns=["payer.payer_additional_identification"]
)


In [0]:
explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l1000b_payee",
    field_mappings={
        "835UID": "835UID",
        "EntityIdentifierCode": "payee.entity_identifier_code",
        "PayeeName": "payee.payee_name",
        "IdentificationCodeQualifier": "payee.id_code_qualifier",
        "PayeeIDCode": "payee.payee_identifier",
        "EntityRelationshipCode": "payee.entity_relationship_code",
        "PayeeAddressLine1": "payee.payee_address_line_1",
        "PayeeAddressLine2": "payee.payee_address_line_2",
        "PayeeCityName": "payee.payee_city_name",
        "PayeeStateCode": "payee.payee_state_code",
        "PayeePostalZoneorZIPCode": "payee.payee_postal_zone_or_zip_code",
        "CountryCode": "payee.country_code",
        "LocationQualifier": "payee.location_qualifier",
        "CountrySubdivisionCode": "payee.country_subdivision_code"
    },
    key_mappings=[
        "835UID",
        "EntityIdentifierCode"
    ],
)



In [0]:
explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l1000b_rdm_payeedeliverymethod",
    field_mappings={
        "835UID": "835UID",
        "EntityIdentifierCode": "payee.entity_identifier_code",
        "ReportTransmissionCode": "payee.delivery_report_transmission_code",
        "Name": "payee.delivery_name",
        "CommunicationNumber": "payee.delivery_communication_number",
        "ReferenceIdentifier": "payee.delivery_reference_identifier"
    },
    key_mappings=[
        "835UID",
        "EntityIdentifierCode",
        "ReportTransmissionCode"
    ],
    explode_columns=None  # or just omit this argument
)


In [0]:
explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l1000b_ref_payeeadditionalidentifications",
    field_mappings={
        "835UID": "835UID",
        "EntityIdentifierCode": "payee.entity_identifier_code",
        "ReferenceIdentificationQualifier": "exploded_array_1.id_qualifier_code",
        "AdditionalPayeeIDNumber": "exploded_array_1.id",
        "Description": "exploded_array_1.description",
    },
    key_mappings=[
        "835UID",
        "EntityIdentifierCode",
        "ReferenceIdentificationQualifier",
    ],
    explode_columns=["payee.payee_additional_identification"]
)


In [0]:
from pyspark.sql.functions import col, to_timestamp, lit

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2000_summary",
    field_mappings={
        "835UID": "835UID",
        "AssignedNumber": "header_info.assigned_num",
        "ProviderIdentifier": "header_info.ts3.provider_identifier",
        "FacilityCodeValue": "header_info.ts3.facility_code_value",
        "FiscalPeriodDate": to_timestamp("header_info.ts3.fiscal_period_date", "yyyyMMdd"),
        "TotalClaimCount": col("header_info.ts3.total_claim_count").cast("decimal(19,2)"),
        "TotalClaimChangeAmount": col("header_info.ts3.total_claim_change_amount").cast("decimal(19,2)"),
        "TotalCoveredChargeAmount": col("header_info.ts3.total_covered_charge_amount").cast("decimal(19,2)"),
        "TotalNoncoveredChargeAmount": col("header_info.ts3.total_noncovered_charge_amount").cast("decimal(19,2)"),
        "TotalDeniedChargeAmount": col("header_info.ts3.total_denied_charge_amount").cast("decimal(19,2)"),
        "TotalProviderAmount": col("header_info.ts3.total_provider_amount").cast("decimal(19,2)"),
        "TotalInterestAmount": col("header_info.ts3.total_interest_amount").cast("decimal(19,2)"),
        "TotalContractualAdjustmentAmount": col("header_info.ts3.total_contractual_adjustment_amount").cast("decimal(19,2)"),
        "TotalGrammRudmanReductionAmount": col("header_info.ts3.total_gramm_rudman_reduction_amount").cast("decimal(19,2)"),
        "TotalMSPPayerAmount": col("header_info.ts3.total_msp_payer_amount").cast("decimal(19,2)"),
        "TotalBloodDeductibleAmount": col("header_info.ts3.total_blood_deductible_amount").cast("decimal(19,2)"),
        "TotalNonLabChargeAmount": col("header_info.ts3.total_non_lab_charge_amount").cast("decimal(19,2)"),
        "TotalCoinsuranceAmount": col("header_info.ts3.total_coinsurance_amount").cast("decimal(19,2)"),
        "TotalHCPCSReportedChargeAmount": col("header_info.ts3.total_hcpcs_reported_charge_amount").cast("decimal(19,2)"),
        "TotalHCPCSPayableAmount": col("header_info.ts3.total_hcpcs_payable_amount").cast("decimal(19,2)"),
        "TotalDeductibleAmount": col("header_info.ts3.total_deductible_amount").cast("decimal(19,2)"),
        "TotalProfessionalComponentAmount": col("header_info.ts3.total_professional_component_amount").cast("decimal(19,2)"),
        "TotalMSPPatientLiabilityMetAmount": col("header_info.ts3.total_msp_patient_liability_met_amount").cast("decimal(19,2)"),
        "TotalPatientReimbursementAmount": col("header_info.ts3.total_patient_reimbursement_amount").cast("decimal(19,2)"),
        "TotalPIPClaimCount": col("header_info.ts3.total_pip_claim_count").cast("decimal(19,2)"),
        "TotalPIPAdjustmentAmount": col("header_info.ts3.total_pip_adjustment_amount").cast("decimal(19,2)"),
        "TotalDRGAmount": col("header_info.ts2.total_drg_amount").cast("decimal(19,2)"),
        "TotalFederalSpecificAmount": "header_info.ts2.total_federal_specific_amount",
        "TotalHospitalSpecifcAmount": col("header_info.ts2.total_hospital_specific_amount").cast("decimal(19,2)"),
        "TotalDisproportionateAmount": col("header_info.ts2.total_disproportionate_amount").cast("decimal(19,2)"),
        "TotalCapitalAmount": col("header_info.ts2.total_capital_amount").cast("decimal(19,2)"),
        "TotalIndirectMedicalEducationAmount": col("header_info.ts2.total_indirect_medical_education_amount").cast("decimal(19,2)"),
        "TotalOutlierDayCount": col("header_info.ts2.total_outlier_day_count").cast("decimal(19,2)"),
        "TotalDayOutlierAmount": col("header_info.ts2.total_day_outlier_amount").cast("decimal(19,2)"),
        "TotalCostOutlierAmount": col("header_info.ts2.total_cost_outlier_amount").cast("decimal(19,2)"),
        "AverageDRGLengthofStay": col("header_info.ts2.average_drg_length_of_stay").cast("decimal(19,2)"),
        "TotalDischargeCount": col("header_info.ts2.total_discharge_count").cast("decimal(19,2)"),
        "TotalCostReportDayCount": col("header_info.ts2.total_cost_report_day_count").cast("decimal(19,2)"),
        "TotalCoveredDayCount": col("header_info.ts2.total_covered_day_count").cast("decimal(19,2)"),
        "TotalNoncoveredDayCount": col("header_info.ts2.total_noncovered_day_count").cast("decimal(19,2)"),
        "TotalMSPPassThroughAmount": col("header_info.ts2.total_msp_pass_through_amount").cast("decimal(19,2)"),
        "AverageDRGWeight": col("header_info.ts2.average_drg_weight").cast("decimal(19,2)"),
        "TotalPPSCapitalFSPDRGAmount": col("header_info.ts2.total_pps_capital_fsp_drg_amount").cast("decimal(19,2)"),
        "TotalPSPCapitalHSPDRGAmount": col("header_info.ts2.total_psp_capital_hsp_drg_amount").cast("decimal(19,2)"),
        "TotalPPSDSHDRGAmount": col("header_info.ts2.total_pps_dsh_drg_amount").cast("decimal(19,2)")
    },
    key_mappings=["835UID"],
    explode_columns=None  # or omit
)


In [0]:
from pyspark.sql.functions import col, to_timestamp

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2100_amt_claimsuplementalamount",
    explode_columns=[
        "claims",                                   # ‚Üí exploded_array_1
        "exploded_array_1.claim_supplemental_amount"  # ‚Üí exploded_array_2
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "AmountQualifierCode": "exploded_array_2.amount_qualifier_code",
        "ClaimSupplementalInformationAmt": col("exploded_array_2.amt").cast("decimal(19,2)"),
        "CreditDebitFlagCode": "exploded_array_2.credit_debit_flag_code"
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "AmountQualifierCode"
    ]
)


In [0]:
from pyspark.sql.functions import col

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2100_qty_claimsuplementalquantity",
    explode_columns=[
        "claims",  # ‚Üí exploded_array_1
        "exploded_array_1.claim_supplemental_quantity"  # ‚Üí exploded_array_2
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "QuantityQualifier": "exploded_array_2.quantity_qualifier_code",
        "ClaimSupplementalInformationQuantity": col("exploded_array_2.qty").cast("float"),
        "CompositeUnitofMeasure": "exploded_array_2.composite_unit_of_measure"
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "QuantityQualifier"
    ]
)


In [0]:
from pyspark.sql.functions import col

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2100_cas_claimadjustments",
    explode_columns=[
        "claims",                               # ‚Üí exploded_array_1
        "exploded_array_1.claim_adjustments"    # ‚Üí exploded_array_2
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "ClaimAdjustmentGroupCode": "exploded_array_2.adjustment_grp_cd",
        "AdjustmentReasonCode1": "exploded_array_2.adjustment_reason_cd_1",
        "AdjustmentAmount1": col("exploded_array_2.adjustment_amount_1").cast("decimal(19,2)"),
        "AdjustmentQuantity1": col("exploded_array_2.adjustment_quantity_1").cast("decimal(19,2)"),
        "AdjustmentReasonCode2": "exploded_array_2.adjustment_reason_cd_2",
        "AdjustmentAmount2": col("exploded_array_2.adjustment_amount_2").cast("decimal(19,2)"),
        "AdjustmentQuantity2": col("exploded_array_2.adjustment_quantity_2").cast("decimal(19,2)"),
        "AdjustmentReasonCode3": "exploded_array_2.adjustment_reason_cd_3",
        "AdjustmentAmount3": col("exploded_array_2.adjustment_amount_3").cast("decimal(19,2)"),
        "AdjustmentQuantity3": col("exploded_array_2.adjustment_quantity_3").cast("decimal(19,2)"),
        "AdjustmentReasonCode4": "exploded_array_2.adjustment_reason_cd_4",
        "AdjustmentAmount4": col("exploded_array_2.adjustment_amount_4").cast("decimal(19,2)"),
        "AdjustmentQuantity4": col("exploded_array_2.adjustment_quantity_4").cast("decimal(19,2)"),
        "AdjustmentReasonCode5": "exploded_array_2.adjustment_reason_cd_5",
        "AdjustmentAmount5": col("exploded_array_2.adjustment_amount_5").cast("decimal(19,2)"),
        "AdjustmentQuantity5": col("exploded_array_2.adjustment_quantity_5").cast("decimal(19,2)"),
        "AdjustmentReasonCode6": "exploded_array_2.adjustment_reason_cd_6",
        "AdjustmentAmount6": col("exploded_array_2.adjustment_amount_6").cast("decimal(19,2)"),
        "AdjustmentQuantity6": col("exploded_array_2.adjustment_quantity_6").cast("decimal(19,2)")
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "ClaimAdjustmentGroupCode"
    ]
)


In [0]:
from pyspark.sql.functions import col

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2100_claim",
    explode_columns=[
        "claims"  # ‚Üí exploded_array_1
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "ClaimStatusCode": "exploded_array_1.clp.claim_status_code",
        "TotalClaimChargeAmount": col("exploded_array_1.clp.total_claim_charge_amount").cast("decimal(19,2)"),
        "ClaimPaymentAmount": col("exploded_array_1.clp.claim_payment_amount").cast("decimal(19,2)"),
        "PatientResponsibilityAmount": col("exploded_array_1.clp.patient_responsibility_amount").cast("decimal(19,2)"),
        "ClaimFillingIndicatorCode": "exploded_array_1.clp.claim_filing_indicator_code",
        "PayerClaimControlNumber": "exploded_array_1.clp.payer_claim_control_number",
        "FacilityCodeValue": "exploded_array_1.clp.facility_code_value",
        "ClaimFrequencyCode": "exploded_array_1.clp.claim_frequency_code",
        "PatientStatusCode": "exploded_array_1.clp.patient_status_code",
        "DRGCode": "exploded_array_1.clp.drg_code",
        "DRGWeight": col("exploded_array_1.clp.drg_weight").cast("decimal(19,2)"),
        "DischargeFraction": col("exploded_array_1.clp.discharge_fraction").cast("float"),
        "YesNoConditionorResponseCode": "exploded_array_1.clp.yes_no_condition_or_response_code",

        # MIA
        "CoveredDayOrVisitsCount": col("exploded_array_1.mia.covered_days_or_visits_count").cast("decimal(19,2)"),
        "PPSOperationOutlierAmount": col("exploded_array_1.mia.pps_operation_outlier_amount").cast("decimal(19,2)"),
        "LifetimePsychiatricDaysCount": col("exploded_array_1.mia.lifetime_psychiatric_days_count").cast("decimal(19,2)"),
        "ClaimDRGAmount": col("exploded_array_1.mia.claim_drg_amount").cast("decimal(19,2)"),
        "ClaimPaymentRemarkCode": "exploded_array_1.mia.claim_payment_remark_code",
        "ClaimDSHAmount": col("exploded_array_1.mia.claim_dsh_amount").cast("decimal(19,2)"),
        "ClaimMSPPassThruAmount": col("exploded_array_1.mia.claim_msp_pass_thru_amount").cast("decimal(19,2)"),
        "ClaimPPSCapitalAmount": col("exploded_array_1.mia.claim_pps_capital_amount").cast("decimal(19,2)"),
        "PPSCapitalFSPDRGAmount": col("exploded_array_1.mia.pps_capital_fsp_drg_amount").cast("decimal(19,2)"),
        "PPSCapitalHSPDRGAmount": col("exploded_array_1.mia.pps_capital_hsp_drg_amount").cast("decimal(19,2)"),
        "PPSCapitalDSHDRGAmount": col("exploded_array_1.mia.pps_capital_dsh_drg_amount").cast("decimal(19,2)"),
        "OldCapitalAmount": col("exploded_array_1.mia.old_capital_amount").cast("decimal(19,2)"),
        "PPSCapitalIMEAmount": col("exploded_array_1.mia.pps_capital_ime_amount").cast("decimal(19,2)"),
        "PPSOperHSPSpecDRGAmount": col("exploded_array_1.mia.pps_oper_hsp_spec_drg_amount").cast("decimal(19,2)"),
        "CostReportDaycCunt": col("exploded_array_1.mia.cost_report_day_count").cast("decimal(19,2)"),
        "PPSOperFSPSpecDRGAmount": col("exploded_array_1.mia.pps_oper_fsp_spec_drg_amount").cast("decimal(19,2)"),
        "ClaimPPSOutlierAmount": col("exploded_array_1.mia.claim_pps_outlier_amount").cast("decimal(19,2)"),
        "ClaimIndirectTeaching": col("exploded_array_1.mia.claim_indirect_teaching").cast("decimal(19,2)"),
        "NonPayProfCompAmount": col("exploded_array_1.mia.non_pay_prof_comp_amount").cast("decimal(19,2)"),
        "InPatientClaimPaymentRemarkCode1": "exploded_array_1.mia.inpatient_claim_payment_remark_code_1",
        "InPatientClaimPaymentRemarkCode2": "exploded_array_1.mia.inpatient_claim_payment_remark_code_2",
        "InPatientClaimPaymentRemarkCode3": "exploded_array_1.mia.inpatient_claim_payment_remark_code_3",
        "InPatientClaimPaymentRemarkCode4": "exploded_array_1.mia.inpatient_claim_payment_remark_code_4",
        "PPSCapitalExceptionAmount": col("exploded_array_1.mia.pps_capital_exception_amount").cast("decimal(19,2)"),

        # MOA
        "ReimbursementRate": col("exploded_array_1.moa.reimbursement_rate").cast("float"),
        "ClaimHCPCSPayableAmount": col("exploded_array_1.moa.claim_hcpcs_payable_amount").cast("decimal(19,2)"),
        "OutPatientClaimPaymentRemarkCode1": "exploded_array_1.moa.outpatient_claim_payment_remark_code_1",
        "OutPatientClaimPaymentRemarkCode2": "exploded_array_1.moa.outpatient_claim_payment_remark_code_2",
        "OutPatientClaimPaymentRemarkCode3": "exploded_array_1.moa.outpatient_claim_payment_remark_code_3",
        "OutPatientClaimPaymentRemarkCode4": "exploded_array_1.moa.outpatient_claim_payment_remark_code_4",
        "OutPatientClaimPaymentRemarkCode5": "exploded_array_1.moa.outpatient_claim_payment_remark_code_5",
        "ClaimESRDPaymentAmount": col("exploded_array_1.moa.claim_esrd_payment_amount").cast("decimal(19,2)"),
        "NonPayableProfessionalCompAmount": col("exploded_array_1.moa.non_payable_professional_comp_amount").cast("decimal(19,2)")
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber"
    ]
)


In [0]:
from pyspark.sql.functions import col, to_timestamp

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2100_dtm_claimdates",
    explode_columns=[
        "claims",                         # ‚Üí exploded_array_1
        "exploded_array_1.claim_dates"     # ‚Üí exploded_array_2
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "DateTimeQualifier": "exploded_array_2.date_code",
        "DateValue": to_timestamp(col("exploded_array_2.date"), "yyyyMMdd"),
        "TimeValue": to_timestamp(col("exploded_array_2.time"), "HHmmss")  # note: uppercase HH for 24h time
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "DateTimeQualifier"
    ]
)


In [0]:
explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2100_nm1_claimnames",
    explode_columns=[
        "claims",                      # ‚Üí exploded_array_1
        "exploded_array_1.claim_names" # ‚Üí exploded_array_2
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "EntityIdentifierCode": "exploded_array_2.entity_identifier_code",
        "EntityTypeQualifier": "exploded_array_2.entity_type_qualifier",
        "LastNameOrOrganization": "exploded_array_2.last_name_or_organization",
        "FirstName": "exploded_array_2.first_name",
        "MiddleName": "exploded_array_2.middle_name",
        "NamePrefix": "exploded_array_2.name_prefix",
        "NameSuffix": "exploded_array_2.name_suffix",
        "IDCodeQualifier": "exploded_array_2.id_code_qualifier",
        "Identifier": "exploded_array_2.identifier",
        "EntityRelationshipCode": "exploded_array_2.entity_relationship_code"
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "EntityIdentifierCode"
    ]
)


In [0]:
explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2100_per_claimcontacts",
    explode_columns=[
        "claims",                         # ‚Üí exploded_array_1
        "exploded_array_1.claim_contacts"  # ‚Üí exploded_array_2
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "ContactFunctionCode": "exploded_array_2.contact_function_cd",
        "ClaimContactName": "exploded_array_2.contact_name",
        "CommunicationNumberQualifier1": "exploded_array_2.communication_number_qualifier1",
        "ClaimContactCommunication1": "exploded_array_2.contact_communication1",
        "CommunicationNumberQualifier2": "exploded_array_2.communication_number_qualifier2",
        "ClaimContactCommunication2": "exploded_array_2.contact_communication2",
        "CommunicationNumberQualifier3": "exploded_array_2.communication_number_qualifier3",
        "CommunicationNumberExtension": "exploded_array_2.contact_communication3",
        "ContactInquiryReference": "exploded_array_2.contact_inquiry_reference"
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "ContactFunctionCode"
    ]
)


In [0]:
explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2100_ref_claimrelatedidentifications",
    explode_columns=[
        "claims",  # exploded_array_1
        "exploded_array_1.claim_related_identifications"  # exploded_array_2
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "ReferenceIDQualifier": "exploded_array_2.id_qualifier_code",
        "OtherClaimRelatedID": "exploded_array_2.id"
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "ReferenceIDQualifier"
    ]
)


In [0]:
explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2110_amt_claimlinesuplementalamount",
    explode_columns=[
        "claims",  # ‚Üí exploded_array_1
        "exploded_array_1.claim_lines",  # ‚Üí exploded_array_2
        "exploded_array_2.claim_line_supplemental_amount"  # ‚Üí exploded_array_3
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "ProductorServiceIDQualifier1": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[0],
        "AmountQualifierCode": "exploded_array_3.amount_qualifier_code",
        "ServiceSupplementalAmount": col("exploded_array_3.amt").cast("decimal(19,2)"),
        "CreditDebitFlagCode": "exploded_array_3.credit_debit_flag_code"
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "AmountQualifierCode",
        "ProductorServiceIDQualifier1"
    ]
)


In [0]:
from pyspark.sql.functions import col

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2110_qty_claimlinesuplementalquantity",
    explode_columns=[
        "claims",  # ‚Üí exploded_array_1
        "exploded_array_1.claim_lines",  # ‚Üí exploded_array_2
        "exploded_array_2.claim_line_supplemental_quantity"  # ‚Üí exploded_array_3
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "ProductorServiceIDQualifier1": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[0],
        "QuantityQualifier": "exploded_array_3.quantity_qualifier_code",
        "ClaimSupplementalInformationQuantity": col("exploded_array_3.qty").cast("float"),
        "CompositeUnitofMeasure": "exploded_array_3.composite_unit_of_measure"
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "QuantityQualifier",
        "ProductorServiceIDQualifier1"
    ]
)


In [0]:
from pyspark.sql.functions import col, split

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2110_cas_claimlineadjustments",
    explode_columns=[
        "claims",  # exploded_array_1
        "exploded_array_1.claim_lines",  # exploded_array_2
        "exploded_array_2.claim_line_adjustments"  # exploded_array_3
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "ProductorServiceIDQualifier1": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[0],
        "ClaimAdjustmentGroupCode": "exploded_array_3.adjustment_grp_cd",
        "AdjustmentReasonCode1": "exploded_array_3.adjustment_reason_cd_1",
        "AdjustmentAmount1": col("exploded_array_3.adjustment_amount_1").cast("decimal(19,2)"),
        "AdjustmentQuantity1": col("exploded_array_3.adjustment_quantity_1").cast("decimal(19,2)"),
        "AdjustmentReasonCode2": "exploded_array_3.adjustment_reason_cd_2",
        "AdjustmentAmount2": col("exploded_array_3.adjustment_amount_2").cast("decimal(19,2)"),
        "AdjustmentQuantity2": col("exploded_array_3.adjustment_quantity_2").cast("decimal(19,2)"),
        "AdjustmentReasonCode3": "exploded_array_3.adjustment_reason_cd_3",
        "AdjustmentAmount3": col("exploded_array_3.adjustment_amount_3").cast("decimal(19,2)"),
        "AdjustmentQuantity3": col("exploded_array_3.adjustment_quantity_3").cast("decimal(19,2)"),
        "AdjustmentReasonCode4": "exploded_array_3.adjustment_reason_cd_4",
        "AdjustmentAmount4": col("exploded_array_3.adjustment_amount_4").cast("decimal(19,2)"),
        "AdjustmentQuantity4": col("exploded_array_3.adjustment_quantity_4").cast("decimal(19,2)"),
        "AdjustmentReasonCode5": "exploded_array_3.adjustment_reason_cd_5",
        "AdjustmentAmount5": col("exploded_array_3.adjustment_amount_5").cast("decimal(19,2)"),
        "AdjustmentQuantity5": col("exploded_array_3.adjustment_quantity_5").cast("decimal(19,2)"),
        "AdjustmentReasonCode6": "exploded_array_3.adjustment_reason_cd_6",
        "AdjustmentAmount6": col("exploded_array_3.adjustment_amount_6").cast("decimal(19,2)"),
        "AdjustmentQuantity6": col("exploded_array_3.adjustment_quantity_6").cast("decimal(19,2)")
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "ProductorServiceIDQualifier1",
        "ClaimAdjustmentGroupCode"
    ]
)


In [0]:
from pyspark.sql.functions import col, split

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2110_claimline",
    explode_columns=[
        "claims",  # exploded_array_1
        "exploded_array_1.claim_lines"  # exploded_array_2
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "ProductorServiceIDQualifier1": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[0],
        "AdjudicatedProcedureCode": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[1],
        "Procedure1Modifier1": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[2],
        "Procedure1Modifier2": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[3],
        "Procedure1Modifier3": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[4],
        "Procedure1Modifier4": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[5],
        "Procedure1CodeDescription": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[6],
        "LineItemChargeAmount": col("exploded_array_2.claim_line_details.chrg_amt").cast("decimal(19,2)"),
        "LineItemProviderPayment": col("exploded_array_2.claim_line_details.paid_amt").cast("decimal(19,2)"),
        "NUBCRevenueCode": "exploded_array_2.claim_line_details.rev_cd",
        "UnitsofServicePaidCount": col("exploded_array_2.claim_line_details.units").cast("decimal(19,2)"),
        "ProductorServiceIDQualifier2": split(col("exploded_array_2.claim_line_details.original_prcdr_cd"), ":")[0],
        "ProcedureCode": split(col("exploded_array_2.claim_line_details.original_prcdr_cd"), ":")[1],
        "Procedure2Modifier1": split(col("exploded_array_2.claim_line_details.original_prcdr_cd"), ":")[2],
        "Procedure2Modifier2": split(col("exploded_array_2.claim_line_details.original_prcdr_cd"), ":")[3],
        "Procedure2Modifier3": split(col("exploded_array_2.claim_line_details.original_prcdr_cd"), ":")[4],
        "Procedure2Modifier4": split(col("exploded_array_2.claim_line_details.original_prcdr_cd"), ":")[5],
        "ProcedureCodeDescription": split(col("exploded_array_2.claim_line_details.original_prcdr_cd"), ":")[6],
        "OriginalUnitsofServiceCount": col("exploded_array_2.claim_line_details.original_units_of_service_count").cast("decimal(19,2)")
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "ProductorServiceIDQualifier1"
    ]
)


In [0]:
from pyspark.sql.functions import col, split, to_timestamp

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2110_dtm_claimlinedates",
    explode_columns=[
        "claims",  # exploded_array_1
        "exploded_array_1.claim_lines",  # exploded_array_2
        "exploded_array_2.claim_line_dates"  # exploded_array_3
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "ProductorServiceIDQualifier1": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[0],
        "DateTimeQualifier": "exploded_array_3.date_code",
        "DateValue": to_timestamp(col("exploded_array_3.date"), "yyyyMMdd"),
        "TimeValue": to_timestamp(col("exploded_array_3.time"), "HHmmss")
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "ProductorServiceIDQualifier1",
        "DateTimeQualifier"
    ]
)


In [0]:
from pyspark.sql.functions import col, split

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2110_lq_claimlineremarks",
    explode_columns=[
        "claims",  # exploded_array_1
        "exploded_array_1.claim_lines",  # exploded_array_2
        "exploded_array_2.claim_line_remarks"  # exploded_array_3
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "ProductorServiceIDQualifier1": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[0],
        "CodeListQualifierCode": "exploded_array_3.qualifier_cd",
        "RemarkCode": "exploded_array_3.remark_cd"
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "CodeListQualifierCode",
        "ProductorServiceIDQualifier1"
    ]
)


In [0]:
from pyspark.sql.functions import col, split

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_l2110_ref_claimlinerelatedidentifications",
    explode_columns=[
        "claims",  # exploded_array_1
        "exploded_array_1.claim_lines",  # exploded_array_2
        "exploded_array_2.claim_line_related_identifications"  # exploded_array_3
    ],
    field_mappings={
        "835UID": "835UID",
        "PatientControlNumber": "exploded_array_1.clp.patient_control_number",
        "ProductorServiceIDQualifier1": split(col("exploded_array_2.claim_line_details.prcdr_cd"), ":")[0],
        "ReferenceIDQualifier": "exploded_array_3.id_qualifier_code",
        "ReferenceIdentification": "exploded_array_3.id",
        "Description": "exploded_array_3.description"
    },
    key_mappings=[
        "835UID",
        "PatientControlNumber",
        "ReferenceIDQualifier",
        "ProductorServiceIDQualifier1"
    ]
)


In [0]:
from pyspark.sql.functions import col, to_timestamp

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_main",
    explode_columns=[],  # No explosion needed
    field_mappings={
        "835UID": "835UID",
        "AuthorizationInformationQualifier": "`EDI.authorization_information_qualifier`",
        "AuthorizationInformation": "`EDI.authorization_information`",
        "SecurityInformationQualifier": "`EDI.security_information_qualifier`",
        "SecurityInformation": "`EDI.security_information`",
        "InterchangeIDQualifier1": "`EDI.interchange_id_qualifier_1`",
        "InterchangeSenderID": "`EDI.interchange_sender_id`",
        "InterchangeIDQualifier2": "`EDI.interchange_id_qualifier_2`",
        "InterchangeReceiverID": "`EDI.interchange_receiver_id`",
        "InterchangeDate": "`EDI.interchange_date`",
        "InterchangeTime": "`EDI.interchange_time`",
        "RepetitionSeparator": "`EDI.repetition_separator`",
        "InterchangeControlVersionNumber": "`EDI.interchange_control_version_number`",
        "InterchangeControlNumber": "`EDI.interchange_control_number`",
        "AcknowledgementRequested": "`EDI.acknowledgement_requested`",
        "UsageIndicator": "`EDI.usage_indicator`",
        "ComponentElementSeparator": "`EDI.component_element_separator`",
        # "FunctionalGroupHeader": "FunctionalGroupHeader",
        "FunctionalIdentifierCode": "`FunctionalGroup.functional_identifier_code`",
        "ApplicationSenderCode": "`FunctionalGroup.application_sender_code`",
        "ApplicationReceiverCode": "`FunctionalGroup.application_receiver_code`",
        "CreationDate": to_timestamp(col("`FunctionalGroup.creation_date`"), "yyyyMMdd"),
        "CreationDateTime": to_timestamp(col("`FunctionalGroup.creation_date_time_datetime`"), "hhmm"),
        "GroupControlNumber": "`FunctionalGroup.group_control_number`",
        "ResponsibleAgencyCode": "`FunctionalGroup.responsible_agency_code`",
        "VersionReleaseIndustryIDcode": "`FunctionalGroup.version_release_industry_id_code`",
        "TransactionSetIdentifierCode": "`Transaction.transaction_set_identifier_code`",
        "TransactionSetControlNumber": "`Transaction.transaction_set_control_number`",
        "ImplementationConventionReference": "`Transaction.implementation_convention_reference`",
        "TransactionHandlingCode": "payment.bpr.transaction_handling_code",
        "TotalActualProviderPaymentAmt": col("payment.bpr.total_actual_provider_payment_amt").cast("decimal(19,2)"),
        "CreditorDebitFlagCode": "payment.bpr.creditor_debit_flag_code",
        "PaymentMethodCode": "payment.bpr.payment_method_code",
        "PaymentFormatCode": "payment.bpr.payment_format_code",
        "SenderDFIIDNumberQualifier": "payment.bpr.sender_dfiid_number_qualifier",
        "SenderDFIIdentifier": "payment.bpr.sender_dfi_identifier",
        "SenderAccountNumberQualifier": "payment.bpr.sender_account_number_qualifier",
        "SenderBankAcctNumber": "payment.bpr.sender_bank_acct_number",
        "PayerIdentifier": "payment.bpr.payer_identifier",
        "PayerOriginatingCoSupplementalCode": "payment.bpr.payer_originating_co_supplemental_code",
        "ReceiverDFIIDNumberQualifier": "payment.bpr.receiver_dfiid_number_qualifier",
        "ReceiverorProviderBankIDNumber": "payment.bpr.receiver_or_provider_bank_id_number",
        "ReceiverAcctNumberQualifier": "payment.bpr.receiver_acct_number_qualifier",
        "ReceiverorProviderAccountNumber": "payment.bpr.receiver_or_provider_account_number",
        "CheckIssueorEFTEffectiveDate": to_timestamp(col("payment.bpr.check_issue_or_eft_effective_date"), "yyyyMMdd"),
        "BusinessFunctionCode": "payment.bpr.business_function_code",
        "TraceTypeCode": "payment.trn.trace_type_code",
        "CheckorEFTTraceNumber": "payment.trn.check_or_eft_trace_number",
        "TracePayerIdentifier": "payment.trn.trace_payer_identifier",
        "TracePayerOriginatingCoSupplementalCode": "payment.trn.trace_payer_originating_co_supplemental_code"
    },
    key_mappings=[
        "835UID",
    ]
)


In [0]:
from pyspark.sql.functions import col, to_timestamp

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_main_dtm_dates",
    explode_columns=[],  # No explosion needed
    field_mappings={
        "835UID": "835UID",
        "DateTimeQualifier": "payment.dtm.date_code",
        "ProductionDate": to_timestamp(col("payment.dtm.date"), "yyyyMMdd"),
        "ProductionDateTime": to_timestamp(col("payment.dtm.time"), "HHmm")
    },
    key_mappings=[
        "835UID",
        "DateTimeQualifier"
    ]
)


In [0]:
from pyspark.sql.functions import col, to_timestamp

explode_and_load_rows_safe(
    source_table=f"{v_environment}_analytics.db_000000_healthcareformats.remittance",
    target_table=f"{v_environment}_analytics.db_000000_healthcareformats.t835_main_plb_provider",
    explode_columns=[
        "provider_adjustments"  # exploded_array_1
    ],
    field_mappings={
        "835UID": "835UID",
        "ProviderIdentifier": "exploded_array_1.provider_identifier",
        "FiscalPeriodDate": to_timestamp(col("exploded_array_1.fiscal_period_date"), "yyyyMMdd"),
        "AdjustmentIdentifier1": "exploded_array_1.provider_adjustment_reason_cd_1",
        "AdjustmentReasonCode1": "exploded_array_1.provider_adjustment_id_1",
        "ProviderAdjustmentAmount1": col("exploded_array_1.provider_adjustment_amt_1").cast("decimal(19,2)"),
        "AdjustmentIdentifier2": "exploded_array_1.provider_adjustment_reason_cd_2",
        "AdjustmentReasonCode2": "exploded_array_1.provider_adjustment_id_2",
        "ProviderAdjustmentAmount2": col("exploded_array_1.provider_adjustment_amt_2").cast("decimal(19,2)"),
        "AdjustmentIdentifier3": "exploded_array_1.provider_adjustment_reason_cd_3",
        "AdjustmentReasonCode3": "exploded_array_1.provider_adjustment_id_3",
        "ProviderAdjustmentAmount3": col("exploded_array_1.provider_adjustment_amt_3").cast("decimal(19,2)"),
        "AdjustmentIdentifier4": "exploded_array_1.provider_adjustment_reason_cd_4",
        "AdjustmentReasonCode4": "exploded_array_1.provider_adjustment_id_4",
        "ProviderAdjustmentAmount4": col("exploded_array_1.provider_adjustment_amt_4").cast("decimal(19,2)"),
        "AdjustmentIdentifier5": "exploded_array_1.provider_adjustment_reason_cd_5",
        "AdjustmentReasonCode5": "exploded_array_1.provider_adjustment_id_5",
        "ProviderAdjustmentAmount5": col("exploded_array_1.provider_adjustment_amt_5").cast("decimal(19,2)"),
        "AdjustmentIdentifier6": "exploded_array_1.provider_adjustment_reason_cd_6",
        "AdjustmentReasonCode6": "exploded_array_1.provider_adjustment_id_6",
        "ProviderAdjustmentAmount6": col("exploded_array_1.provider_adjustment_amt_6").cast("decimal(19,2)")
    },
    key_mappings=[
        "835UID",
        "ProviderIdentifier",
    ]
)
