In [0]:
dbutils.widgets.text("p_client_id", "", "Client ID")
dbutils.widgets.text("p_consolidated_schema_name", "", "Consolidated Schema Name")
dbutils.widgets.text("p_environment_name", "", "Environment Name")

In [0]:
v_clientid = dbutils.widgets.get("p_client_id").strip()
v_consolidated_schema_name =  dbutils.widgets.get("p_consolidated_schema_name").strip()
v_environment_name = dbutils.widgets.get("p_environment_name").strip()

In [0]:
%run "../../common/ParserCommonFunctions"

In [0]:
v_locations_dict = get_locations_by_env(v_environment_name)

v_file_location        = v_locations_dict['source']
v_archive_location     = v_locations_dict['archive']
v_unity_catalog        = v_locations_dict['unity_catalog']

v_location_client = v_file_location + v_clientid
v_file_location_client = v_file_location + v_clientid + "/835/"
v_archive_location_client = v_archive_location + v_clientid + "/parsed/835/"
v_quarantined_location_client = v_archive_location + v_clientid + "/quarantined/835/"
v_client_schema = "db_" + v_clientid
v_catalog = v_unity_catalog
v_consolidated_catalog = v_unity_catalog
v_healthcare_file_location = v_file_location + v_consolidated_schema_name.replace("db_","")

In [0]:

v_folder_contents_list = dbutils.fs.ls(v_file_location_client)

if len(v_folder_contents_list) == 0:
    dbutils.notebook.exit("No files to process")

In [0]:
%pip install ../x12-edi-parser

In [0]:
import json
import pandas as pd

from pyspark.sql.functions import (
    pandas_udf,
    from_json,
    col,
    schema_of_json,
    to_json,
    lit,
    current_timestamp,
    expr,
    split,
    element_at,
    concat,
    coalesce
)

from pyspark.sql.types import StructType, StructField, StringType, ArrayType

from ember import *
from ember.hls.healthcare import HealthcareManager as hm

from pyspark.sql.utils import AnalysisException


In [0]:
create_stmt = (
    f"CREATE SCHEMA IF NOT EXISTS {v_consolidated_catalog}.{v_consolidated_schema_name}"
    f" MANAGED LOCATION '{v_healthcare_file_location}'"
)
print(f"Creating schema: {create_stmt}")
spark.sql(create_stmt)


In [0]:
quarantine_schema = StructType([
    StructField("filename", StringType(), True),
    StructField("error", StringType(), True)
])


v_files_list = [f.path for f in dbutils.fs.ls(v_file_location_client)]
v_good_files_list = []
v_quarantine_files_list = []


for f in v_files_list:
    try:
        spark.read.text(v_file_location_client + f, wholetext=True)
        v_good_files_list.append(f)
    except Exception as e:
        v_quarantine_files_list.append((f, f"[STORAGE_READ_ERROR] {str(e)}"))


if v_quarantine_files_list:
    v_quarantine_files_df = spark.createDataFrame(v_quarantine_files_list, schema=quarantine_schema)
else:
    v_quarantine_files_df = spark.createDataFrame([], schema=quarantine_schema)

v_quarantine_files_audit_df = add_audit_fields(v_quarantine_files_df, lit(v_clientid), lit('tbd'))

v_quarantine_target_table_location = f"{v_consolidated_catalog}.{v_consolidated_schema_name}.quarantine"
append_or_create_table(v_quarantine_files_audit_df, v_quarantine_target_table_location)

In [0]:
if v_good_files_list:
    v_good_files_df = spark.read.text(v_good_files_list, wholetext=True)
    v_good_files_df = v_good_files_df.withColumn("filename", v_good_files_df["_metadata.file_path"])
    v_good_files_with_results_df = (
        v_good_files_df
        .withColumn("parsed", parse_edi_udf(col("value"), col("filename")))
        .withColumn("json_str", expr("explode(parsed)"))
    )

    v_parsing_failures_df = (
        v_good_files_with_results_df
        .filter(col("parsed").cast("string").contains('"quarantine": true'))
    )

    v_parsing_success_df = (
        v_good_files_with_results_df
        .filter(~col("parsed").cast("string").contains('"quarantine": true'))
    )

In [0]:
if v_parsing_failures_df:
    v_parsing_failures_df = v_parsing_failures_df.select(col('filename'), col('json_str').alias('error'))
    v_parsing_failures_with_audit_df = add_audit_fields(v_parsing_failures_df, lit(v_clientid), lit('tbd'))

    append_or_create_table(v_parsing_failures_with_audit_df, v_quarantine_target_table_location)

In [0]:
# remittance_ref_schema = StructType([
#     # ---------------- EDI ----------------
#     StructField('EDI.control_number', StringType(), True),
#     StructField('EDI.date', StringType(), True),
#     StructField('EDI.recipient_qualifier_id', StringType(), True),
#     StructField('EDI.sender_qualifier_id', StringType(), True),
#     StructField('EDI.standard_version', StringType(), True),
#     StructField('EDI.time', StringType(), True),

#     # ---------------- FunctionalGroup ----------------
#     StructField('FunctionalGroup.control_number', StringType(), True),
#     StructField('FunctionalGroup.date', StringType(), True),
#     StructField('FunctionalGroup.receiver', StringType(), True),
#     StructField('FunctionalGroup.sender', StringType(), True),
#     StructField('FunctionalGroup.standard_version', StringType(), True),
#     StructField('FunctionalGroup.time', StringType(), True),
#     StructField('FunctionalGroup.transaction_type', StringType(), True),

#     # ---------------- Transaction ----------------
#     StructField('Transaction.transaction_type', StringType(), True),

#     # ---------------- Header Info ----------------
#     StructField('header_info', StructType([
#         StructField('assigned_num', StringType(), True),
#         StructField('ts3',
#             StructType([
#                 StructField('provider_identifier', StringType(), True),
#                 StructField('facility_code_value', StringType(), True),
#                 StructField('fiscal_period_date', StringType(), True),
#                 StructField('total_claim_count', StringType(), True),
#                 StructField('total_claim_change_amount', StringType(), True),
#                 StructField('total_covered_charge_amount', StringType(), True),
#                 StructField('total_noncovered_charge_amount', StringType(), True),
#                 StructField('total_denied_charge_amount', StringType(), True),
#                 StructField('total_provider_amount', StringType(), True),
#                 StructField('total_interest_amount', StringType(), True),
#                 StructField('total_contractual_adjustment_amount', StringType(), True),
#                 StructField('total_gramm_rudman_reduction_amount', StringType(), True),
#                 StructField('total_msp_payer_amount', StringType(), True),
#                 StructField('total_blood_deductible_amount', StringType(), True),
#                 StructField('total_non_lab_charge_amount', StringType(), True),
#                 StructField('total_coinsurance_amount', StringType(), True),
#                 StructField('total_hcpcs_reported_charge_amount', StringType(), True),
#                 StructField('total_hcpcs_payable_amount', StringType(), True),
#                 StructField('total_deductible_amount', StringType(), True),
#                 StructField('total_professional_component_amount', StringType(), True),
#                 StructField('total_msp_patient_liability_met_amount', StringType(), True),
#                 StructField('total_patient_reimbursement_amount', StringType(), True),
#                 StructField('total_pip_claim_count', StringType(), True),
#                 StructField('total_pip_adjustment_amount', StringType(), True),
#             ]), True),
#         StructField('ts2',
#                 StructType([                                
#                     StructField('total_drg_amount', StringType(), True),
#                     StructField('total_federal_specific_amount', StringType(), True),
#                     StructField('total_hospital_specific_amount', StringType(), True),
#                     StructField('total_disproportionate_amount', StringType(), True),
#                     StructField('total_capital_amount', StringType(), True),
#                     StructField('total_indirect_medical_education_amount', StringType(), True),
#                     StructField('total_outlier_day_count', StringType(), True),
#                     StructField('total_day_outlier_amount', StringType(), True),
#                     StructField('total_cost_outlier_amount', StringType(), True),
#                     StructField('average_drg_length_of_stay', StringType(), True),
#                     StructField('total_discharge_count', StringType(), True),
#                     StructField('total_cost_report_day_count', StringType(), True),
#                     StructField('total_covered_day_count', StringType(), True),
#                     StructField('total_noncovered_day_count', StringType(), True),
#                     StructField('total_msp_pass_through_amount', StringType(), True),
#                     StructField('average_drg_weight', StringType(), True),
#                     StructField('total_pps_capital_fsp_drg_amount', StringType(), True),
#                     StructField('total_psp_capital_hsp_drg_amount', StringType(), True),
#                     StructField('total_pps_dsh_drg_amount', StringType(), True)
#                 ]), True),
#     ]), True),


#     # ---------------- Claims ----------------
#     StructField('claims', ArrayType(
#         StructType([
#             StructField('clp', StructType([
#                 StructField('claim_id', StringType(), True),
#                 StructField('claim_status_cd', StringType(), True),
#                 StructField('claim_chrg_amt', StringType(), True),
#                 StructField('claim_pay_amt', StringType(), True),
#                 StructField('patient_pay_amt', StringType(), True),
#                 StructField('claim_filing_cd', StringType(), True),
#                 StructField('payer_claim_id', StringType(), True),
#                 StructField('type_of_bill_cd', StringType(), True),
#                 StructField('claim_freq_cd', StringType(), True),
#                 StructField('patient_status_code', StringType(), True),
#                 StructField('drg_cd', StringType(), True),
#                 StructField('drg_weight', StringType(), True),
#                 StructField('discharge_fraction', StringType(), True),
#                 StructField('yes_no_condition_or_response_code', StringType(), True),
#                 ]), True),
            
#             StructField('first_nm1', StructType([
#                 StructField('patient_entity_id_cd', StringType(), True),
#                 StructField('entity_type_qualifier', StringType(), True),
#                 StructField('patient_last_name_or_organization', StringType(), True),
#                 StructField('patient_first_name', StringType(), True),
#                 StructField('patient_middle_name', StringType(), True),
#                 StructField('patient_name_prefix', StringType(), True),
#                 StructField('patient_name_suffix', StringType(), True),
#                 StructField('id_code_qualifier', StringType(), True),
#                 StructField('patient_id', StringType(), True),
#                 StructField('patient_entity_relationship_code', StringType(), True),
#                 ]), True),

#             # ---- Person / Org ----
#             StructField('claim_contacts', ArrayType(
#                 StructType([
#                     StructField('entity_last_or_organization_name', StringType(), True),
#                     StructField('entity_first', StringType(), True),
#                     StructField('entity_middle_name', StringType(), True),
#                     StructField('entity_name_prefix', StringType(), True),
#                     StructField('entity_name_suffix', StringType(), True),
#                     StructField('entity_id_cd', StringType(), True),
#                     StructField('entity_relationship_code', StringType(), True),
#                     StructField('entity_type_qualifier', StringType(), True),
#                     StructField('id_cd', StringType(), True),
#                     StructField('id_cd_qualifier', StringType(), True)
#                 ]), True
#             ), True),

#             StructField('claim_dates', ArrayType(
#                 StructType([
#                     StructField('date_cd', StringType(), True),
#                     StructField('date', StringType(), True),
#                     StructField('time', StringType(), True),
#                 ]), True
#             ), True),

#             # ---- MIA ----
#             StructField('mia', StructType([
#                 StructField('covered_days_or_visits_count', StringType(), True),
#                 StructField('pps_operation_outlier_amount', StringType(), True),
#                 StructField('lifetime_psychiatric_days_count', StringType(), True),
#                 StructField('claim_drg_amount', StringType(), True),
#                 StructField('claim_payment_remark_code', StringType(), True),
#                 StructField('claim_dsh_amount', StringType(), True),
#                 StructField('claim_msp_pass_thru_amount', StringType(), True),
#                 StructField('claim_pps_capital_amount', StringType(), True),
#                 StructField('pps_capital_fsp_drg_amount', StringType(), True),
#                 StructField('pps_capital_hsp_drg_amount', StringType(), True),
#                 StructField('pps_capital_dsh_drg_amount', StringType(), True),
#                 StructField('old_capital_amount', StringType(), True),
#                 StructField('pps_capital_ime_amount', StringType(), True),
#                 StructField('pps_oper_hsp_spec_drg_amount', StringType(), True),
#                 StructField('cost_report_day_count', StringType(), True),
#                 StructField('pps_oper_fsp_spec_drg_amount', StringType(), True),
#                 StructField('claim_pps_outlier_amount', StringType(), True),
#                 StructField('claim_indirect_teaching', StringType(), True),
#                 StructField('non_pay_prof_comp_amount', StringType(), True),
#                 StructField('inpatient_claim_payment_remark_code_1', StringType(), True),
#                 StructField('inpatient_claim_payment_remark_code_2', StringType(), True),
#                 StructField('inpatient_claim_payment_remark_code_3', StringType(), True),
#                 StructField('inpatient_claim_payment_remark_code_4', StringType(), True),
#                 StructField('pps_capital_exception_amount', StringType(), True),
#             ]), True),
#             # ---- MOA ----
#             StructField('moa', StructType([
#                 StructField('reimbursement_rate', StringType(), True),
#                 StructField('claim_hcpcs_payable_amount', StringType(), True),
#                 StructField('outpatient_claim_payment_remark_code_1', StringType(), True),
#                 StructField('outpatient_claim_payment_remark_code_2', StringType(), True),
#                 StructField('outpatient_claim_payment_remark_code_3', StringType(), True),
#                 StructField('outpatient_claim_payment_remark_code_4', StringType(), True),
#                 StructField('outpatient_claim_payment_remark_code_5', StringType(), True),
#                 StructField('claim_esrd_payment_amount', StringType(), True),
#                 StructField('non_payable_professional_comp_amount', StringType(), True),
#             ]), True),



#             # ---- Claim Adjustments ----
#             StructField('claim_adjustments', ArrayType(
#                 StructType([
#                     StructField('adjustment_grp_cd', StringType(), True),
#                     StructField('adjustment_reason_cd_1', StringType(), True),
#                     StructField('adjustment_amount_1', StringType(), True),
#                     StructField('adjustment_quantity_1', StringType(), True),
#                     StructField('adjustment_reason_cd_2', StringType(), True),
#                     StructField('adjustment_amount_2', StringType(), True),
#                     StructField('adjustment_quantity_2', StringType(), True),
#                     StructField('adjustment_reason_cd_3', StringType(), True),
#                     StructField('adjustment_amount_3', StringType(), True),
#                     StructField('adjustment_quantity_3', StringType(), True),
#                     StructField('adjustment_reason_cd_4', StringType(), True),
#                     StructField('adjustment_amount_4', StringType(), True),
#                     StructField('adjustment_quantity_4', StringType(), True),
#                     StructField('adjustment_reason_cd_5', StringType(), True),
#                     StructField('adjustment_amount_5', StringType(), True),
#                     StructField('adjustment_quantity_5', StringType(), True),
#                     StructField('adjustment_reason_cd_6', StringType(), True),
#                     StructField('adjustment_amount_6', StringType(), True),
#                     StructField('adjustment_quantity_6', StringType(), True)
#                 ]), True
#             ), True),

#             StructField('claim_related_identifications', ArrayType(
#                 StructType([
#                     StructField('id', StringType(), True),
#                     StructField('id_code_qualifier', StringType(), True)
#                 ]), True
#             ), True),
#             StructField('claim_supplemental_amount', ArrayType(
#                 StructType([
#                     StructField('amount_qualifier_code', StringType(), True),
#                     StructField('amt', StringType(), True),
#                     StructField('credit_debit_flag_code', StringType(), True)
#                 ]), True
#             ), True),

#             StructField('claim_supplemental_quantity', ArrayType(
#                 StructType([
#                     StructField('quantity_qualifier_code', StringType(), True),
#                     StructField('qty', StringType(), True),
#                     StructField('composite_unit_of_measure', StringType(), True)
#                 ]), True
#             ), True),

#             # ---- Claim Lines ----
#             StructField('claim_lines', ArrayType(
#                 StructType([
#                     StructField('chrg_amt', StringType(), True),
#                     StructField('line_refs', ArrayType(
#                         StructType([
#                             StructField('id', StringType(), True),
#                             StructField('id_code_qualifier', StringType(), True)
#                         ]), True
#                     ), True),
#                     StructField('original_prcdr_cd', StringType(), True),
#                     StructField('claim_line_supplemental_amount', ArrayType(
#                         StructType([
#                             StructField('amt', StringType(), True),
#                             StructField('credit_debit_flag_code', StringType(), True),
#                             StructField('amt_qualifier_cd', StringType(), True),
#                         ]), True
#                     ), True),
#                     StructField('claim_line_supplemental_quantity', ArrayType(
#                         StructType([
#                             StructField('qty', StringType(), True),
#                             StructField('composite_unit_of_measure', StringType(), True),
#                             StructField('quantity_qualifier', StringType(), True),
#                         ]), True
#                     ), True),
#                     StructField('paid_amt', StringType(), True),
#                     StructField('prcdr_cd', StringType(), True),
#                     StructField('claim_line_remarks', ArrayType(
#                         StructType([
#                             StructField('qualifier_cd', StringType(), True),
#                             StructField('remark_cd', StringType(), True)
#                         ]), True
#                     ), True),
#                     StructField('rev_cd', StringType(), True),
#                     StructField('service_adjustments', ArrayType(
#                         StructType([
#                             StructField('adjustment_grp_cd', StringType(), True),
#                             StructField('adjustment_reason_cd_1', StringType(), True),
#                             StructField('adjustment_amount_1', StringType(), True),
#                             StructField('adjustment_quantity_1', StringType(), True),
#                             StructField('adjustment_reason_cd_2', StringType(), True),
#                             StructField('adjustment_amount_2', StringType(), True),
#                             StructField('adjustment_quantity_2', StringType(), True),
#                             StructField('adjustment_reason_cd_3', StringType(), True),
#                             StructField('adjustment_amount_3', StringType(), True),
#                             StructField('adjustment_quantity_3', StringType(), True),
#                             StructField('adjustment_reason_cd_4', StringType(), True),
#                             StructField('adjustment_amount_4', StringType(), True),
#                             StructField('adjustment_quantity_4', StringType(), True),
#                             StructField('adjustment_reason_cd_5', StringType(), True),
#                             StructField('adjustment_amount_5', StringType(), True),
#                             StructField('adjustment_quantity_5', StringType(), True),
#                             StructField('adjustment_reason_cd_6', StringType(), True),
#                             StructField('adjustment_amount_6', StringType(), True),
#                             StructField('adjustment_quantity_6', StringType(), True)
#                         ]), True
#                     ), True),
#                     StructField('service_date', StringType(), True),
#                     StructField('service_time', StringType(), True),
#                     StructField('service_date_qualifier_cd', StringType(), True),
#                     StructField('units', StringType(), True)
#                 ]), True
#             ), True),
            

                        



            
            

#         ]), True
#     ), True),

#     # ---------------- File Info ----------------
#     StructField('filename', StringType(), True),


#     # ---------------- Payee ----------------
#     StructField('payee', StructType([
#         StructField('payee_entity_id_cd', StringType(), True),
#         StructField('payee_id_cd', StringType(), True),
#         StructField('payee_entity_relationship_code', StringType(), True),
#         StructField('payee_name', StringType(), True),
#         StructField('payee_npi', StringType(), True),
#         StructField('payee_address_line_1', StringType(), True),
#         StructField('payee_address_line_2', StringType(), True),
#         StructField('payee_city_name', StringType(), True),
#         StructField('payee_state_code', StringType(), True),
#         StructField('payee_zip', StringType(), True),
#         StructField('payee_country_code', StringType(), True),
#         StructField('payee_location_qualifier', StringType(), True),
#         StructField('payee_country_subdivision_code', StringType(), True),
#         StructField('payee_additional_identification', ArrayType(
#             StructType([
#                 StructField('payee_primary_id', StringType(), True),
#                 StructField('payee_secondary_id', StringType(), True),
#                 StructField('payee_description', StringType(), True),
#             ]), True
#         ), True),
#         StructField('delivery_report_transmission_code', StringType(), True),
#         StructField('delivery_name', StringType(), True),
#         StructField('delivery_communication_number', StringType(), True),
#         StructField('delivery_reference_identifier', StringType(), True)
#     ]), True),

#     # ---------------- Payer ----------------
#     StructField('payer', StructType([
#         StructField('entity_id_cd', StringType(), True),
#         StructField('payer_city', StringType(), True),
#         StructField('payer_contact_info', ArrayType(
#             StructType([
#                 StructField('payer_contact_function_cd', StringType(), True),
#                 StructField('payer_contact_name', StringType(), True),
#                 StructField('payer_communication_number_qualifier1', StringType(), True),
#                 StructField('payer_contact_communication1', StringType(), True),
#                 StructField('payer_communication_number_qualifier2', StringType(), True),
#                 StructField('payer_contact_communication2', StringType(), True),
#                 StructField('payer_communication_number_qualifier3', StringType(), True),
#                 StructField('payer_contact_communication3', StringType(), True),
#                 StructField('payer_contact_inquiry_reference', StringType(), True)
#             ]), True
#         ), True),
#         StructField('payer_name', StringType(), True),
#         StructField('payer_id_code_qualifier', StringType(), True),
#         StructField('payer_identifier', StringType(), True),
#         StructField('payer_entity_relationship_code', StringType(), True),
#         StructField('payer_additional_identification', ArrayType(
#             StructType([
#                 StructField('payer_primary_id', StringType(), True),
#                 StructField('payer_secondary_id', StringType(), True),
#                 StructField('payer_description', StringType(), True),
#             ]), True
#         ), True),
#         StructField('payer_state', StringType(), True),
#         StructField('payer_street', StringType(), True),
#         StructField('payer_address_line_2', StringType(), True),
#         StructField('payer_zip', StringType(), True),
#         StructField('payer_country_code', StringType(), True),
#         StructField('payer_location_qualifier', StringType(), True),
#         StructField('payer_country_subdivision_code', StringType(), True)
#     ]), True),

#     # ---------------- Payment ----------------
#     StructField('payment', StructType([
#         StructField('credit_debit_flag', StringType(), True),
#         StructField('monetary_amt', StringType(), True),
#         StructField('payment_date', StringType(), True),
#         StructField('payment_method_cd', StringType(), True),
#         StructField('trace_origin_company_id', StringType(), True),
#         StructField('trace_reference_id', StringType(), True),
#         StructField('trace_type_cd', StringType(), True),
#         StructField('transaction_handling_cd', StringType(), True)
#     ]), True),

#     # ---------------- Misc ----------------
#     StructField('provider_adjustments', ArrayType(
#         StructType([
#             StructField('provider_identifier', StringType(), True),
#             StructField('fiscal_period_date', StringType(), True),
#             StructField('provider_adjustment_reason_cd_1', StringType(), True),
#             StructField('provider_adjustment_id_1', StringType(), True),
            
#             StructField('provider_adjustment_amt_1', StringType(), True),
#             StructField('provider_adjustment_reason_cd_2', StringType(), True),
#             StructField('provider_adjustment_id_2', StringType(), True),

#             StructField('provider_adjustment_amt_2', StringType(), True),
#             StructField('provider_adjustment_reason_cd_3', StringType(), True),
#             StructField('provider_adjustment_id_3', StringType(), True),

#             StructField('provider_adjustment_amt_3', StringType(), True),
#             StructField('provider_adjustment_reason_cd_4', StringType(), True),
#             StructField('provider_adjustment_id_4', StringType(), True),

#             StructField('provider_adjustment_amt_4', StringType(), True),
#             StructField('provider_adjustment_reason_cd_5', StringType(), True),
#             StructField('provider_adjustment_id_5', StringType(), True),

#             StructField('provider_adjustment_amt_5', StringType(), True),
#             StructField('provider_adjustment_reason_cd_6', StringType(), True),
#             StructField('provider_adjustment_id_6', StringType(), True),

#             StructField('provider_adjustment_amt_6', StringType(), True),
#         ]), True
#     ), True)
# ])

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, ArrayType

# ---------------- EDI ----------------
edi_schema = [
    # StructField('EDI.control_number', StringType(), True),
    # StructField('EDI.date', StringType(), True),
    # StructField('EDI.recipient_qualifier_id', StringType(), True),
    # StructField('EDI.sender_qualifier_id', StringType(), True),
    # StructField('EDI.standard_version', StringType(), True),
    # StructField('EDI.time', StringType(), True),

    StructField('EDI.authorization_information_qualifier', StringType(), True),
    StructField('EDI.authorization_information', StringType(), True),
    StructField('EDI.security_information_qualifier', StringType(), True),
    StructField('EDI.security_information', StringType(), True),
    StructField('EDI.interchange_id_qualifier_1', StringType(), True),
    StructField('EDI.interchange_sender_id', StringType(), True),
    StructField('EDI.interchange_id_qualifier_2', StringType(), True),
    StructField('EDI.interchange_receiver_id', StringType(), True),
    StructField('EDI.interchange_date', StringType(), True),
    StructField('EDI.interchange_time', StringType(), True),
    StructField('EDI.repetition_separator', StringType(), True),
    StructField('EDI.interchange_control_version_number', StringType(), True),
    StructField('EDI.interchange_control_number', StringType(), True),
    StructField('EDI.acknowledgement_requested', StringType(), True),
    StructField('EDI.usage_indicator', StringType(), True),
    StructField('EDI.component_element_separator', StringType(), True),
]

# ---------------- Functional Group ----------------
functional_group_schema = [
    # StructField('FunctionalGroup.control_number', StringType(), True),
    # StructField('FunctionalGroup.date', StringType(), True),
    # StructField('FunctionalGroup.receiver', StringType(), True),
    # StructField('FunctionalGroup.sender', StringType(), True),
    # StructField('FunctionalGroup.standard_version', StringType(), True),
    # StructField('FunctionalGroup.time', StringType(), True),
    # StructField('FunctionalGroup.transaction_type', StringType(), True),

    StructField('FunctionalGroup.functional_identifier_code', StringType(), True),
    StructField('FunctionalGroup.application_sender_code', StringType(), True),
    StructField('FunctionalGroup.application_receiver_code', StringType(), True),
    StructField('FunctionalGroup.creation_date', StringType(), True),
    StructField('FunctionalGroup.creation_date_time_datetime', StringType(), True),
    StructField('FunctionalGroup.group_control_number', StringType(), True),
    StructField('FunctionalGroup.responsible_agency_code', StringType(), True),
    StructField('FunctionalGroup.version_release_industry_id_code', StringType(), True),

]

# ---------------- Transaction ----------------
transaction_schema = [
    StructField('Transaction.transaction_set_identifier_code', StringType(), True),
    StructField('Transaction.transaction_set_control_number', StringType(), True),
    StructField('Transaction.implementation_convention_reference', StringType(), True),
]

# ---------------- Header Info ----------------
ts3_schema = StructType([
    StructField('provider_identifier', StringType(), True),
    StructField('facility_code_value', StringType(), True),
    StructField('fiscal_period_date', StringType(), True),
    StructField('total_claim_count', StringType(), True),
    StructField('total_claim_change_amount', StringType(), True),
    StructField('total_covered_charge_amount', StringType(), True),
    StructField('total_noncovered_charge_amount', StringType(), True),
    StructField('total_denied_charge_amount', StringType(), True),
    StructField('total_provider_amount', StringType(), True),
    StructField('total_interest_amount', StringType(), True),
    StructField('total_contractual_adjustment_amount', StringType(), True),
    StructField('total_gramm_rudman_reduction_amount', StringType(), True),
    StructField('total_msp_payer_amount', StringType(), True),
    StructField('total_blood_deductible_amount', StringType(), True),
    StructField('total_non_lab_charge_amount', StringType(), True),
    StructField('total_coinsurance_amount', StringType(), True),
    StructField('total_hcpcs_reported_charge_amount', StringType(), True),
    StructField('total_hcpcs_payable_amount', StringType(), True),
    StructField('total_deductible_amount', StringType(), True),
    StructField('total_professional_component_amount', StringType(), True),
    StructField('total_msp_patient_liability_met_amount', StringType(), True),
    StructField('total_patient_reimbursement_amount', StringType(), True),
    StructField('total_pip_claim_count', StringType(), True),
    StructField('total_pip_adjustment_amount', StringType(), True),
])

ts2_schema = StructType([
    StructField('total_drg_amount', StringType(), True),
    StructField('total_federal_specific_amount', StringType(), True),
    StructField('total_hospital_specific_amount', StringType(), True),
    StructField('total_disproportionate_amount', StringType(), True),
    StructField('total_capital_amount', StringType(), True),
    StructField('total_indirect_medical_education_amount', StringType(), True),
    StructField('total_outlier_day_count', StringType(), True),
    StructField('total_day_outlier_amount', StringType(), True),
    StructField('total_cost_outlier_amount', StringType(), True),
    StructField('average_drg_length_of_stay', StringType(), True),
    StructField('total_discharge_count', StringType(), True),
    StructField('total_cost_report_day_count', StringType(), True),
    StructField('total_covered_day_count', StringType(), True),
    StructField('total_noncovered_day_count', StringType(), True),
    StructField('total_msp_pass_through_amount', StringType(), True),
    StructField('average_drg_weight', StringType(), True),
    StructField('total_pps_capital_fsp_drg_amount', StringType(), True),
    StructField('total_psp_capital_hsp_drg_amount', StringType(), True),
    StructField('total_pps_dsh_drg_amount', StringType(), True),
])

header_info_schema = StructType([
    StructField('assigned_num', StringType(), True),
    StructField('ts3', ts3_schema, True),
    StructField('ts2', ts2_schema, True),
])

# ---------------- Claim Subschemas ----------------
clp_schema = StructType([
    StructField('patient_control_number', StringType(), True),
    StructField('claim_status_code', StringType(), True),
    StructField('total_claim_charge_amount', StringType(), True),
    StructField('claim_payment_amount', StringType(), True),
    StructField('patient_responsibility_amount', StringType(), True),
    StructField('claim_filing_indicator_code', StringType(), True),
    StructField('payer_claim_control_number', StringType(), True),
    StructField('facility_code_value', StringType(), True),
    StructField('claim_frequency_code', StringType(), True),
    StructField('patient_status_code', StringType(), True),
    StructField('drg_code', StringType(), True),
    StructField('drg_weight', StringType(), True),
    StructField('discharge_fraction', StringType(), True),
    StructField('yes_no_condition_or_response_code', StringType(), True),
])

nm1_schema = StructType([
    StructField('entity_identifier_code', StringType(), True),
    StructField('entity_type_qualifier', StringType(), True),
    StructField('last_name_or_organization', StringType(), True),
    StructField('first_name', StringType(), True),
    StructField('middle_name', StringType(), True),
    StructField('name_prefix', StringType(), True),
    StructField('name_suffix', StringType(), True),
    StructField('id_code_qualifier', StringType(), True),
    StructField('identifier', StringType(), True),
    StructField('entity_relationship_code', StringType(), True),
])

claim_names_schema = ArrayType(nm1_schema)

dtm_schema = ArrayType(StructType([
    StructField('date_code', StringType(), True),
    StructField('date', StringType(), True),
    StructField('time', StringType(), True),
]))

mia_schema = StructType([
    StructField('covered_days_or_visits_count', StringType(), True),
    StructField('pps_operation_outlier_amount', StringType(), True),
    StructField('lifetime_psychiatric_days_count', StringType(), True),
    StructField('claim_drg_amount', StringType(), True),
    StructField('claim_payment_remark_code', StringType(), True),
    StructField('claim_dsh_amount', StringType(), True),
    StructField('claim_msp_pass_thru_amount', StringType(), True),
    StructField('claim_pps_capital_amount', StringType(), True),
    StructField('pps_capital_fsp_drg_amount', StringType(), True),
    StructField('pps_capital_hsp_drg_amount', StringType(), True),
    StructField('pps_capital_dsh_drg_amount', StringType(), True),
    StructField('old_capital_amount', StringType(), True),
    StructField('pps_capital_ime_amount', StringType(), True),
    StructField('pps_oper_hsp_spec_drg_amount', StringType(), True),
    StructField('cost_report_day_count', StringType(), True),
    StructField('pps_oper_fsp_spec_drg_amount', StringType(), True),
    StructField('claim_pps_outlier_amount', StringType(), True),
    StructField('claim_indirect_teaching', StringType(), True),
    StructField('non_pay_prof_comp_amount', StringType(), True),
    StructField('inpatient_claim_payment_remark_code_1', StringType(), True),
    StructField('inpatient_claim_payment_remark_code_2', StringType(), True),
    StructField('inpatient_claim_payment_remark_code_3', StringType(), True),
    StructField('inpatient_claim_payment_remark_code_4', StringType(), True),
    StructField('pps_capital_exception_amount', StringType(), True),
])

moa_schema = StructType([
    StructField('reimbursement_rate', StringType(), True),
    StructField('claim_hcpcs_payable_amount', StringType(), True),
    StructField('outpatient_claim_payment_remark_code_1', StringType(), True),
    StructField('outpatient_claim_payment_remark_code_2', StringType(), True),
    StructField('outpatient_claim_payment_remark_code_3', StringType(), True),
    StructField('outpatient_claim_payment_remark_code_4', StringType(), True),
    StructField('outpatient_claim_payment_remark_code_5', StringType(), True),
    StructField('claim_esrd_payment_amount', StringType(), True),
    StructField('non_payable_professional_comp_amount', StringType(), True),
])


ref_schema = ArrayType(StructType([
    StructField('id_qualifier_code', StringType(), True),
    StructField('id', StringType(), True),
    StructField('description', StringType(), True),
]))

amt_schema = ArrayType(StructType([
    StructField('amount_qualifier_code', StringType(), True),
    StructField('amt', StringType(), True),
    StructField('credit_debit_flag_code', StringType(), True),
]))

qty_schema = ArrayType(StructType([
    StructField('quantity_qualifier_code', StringType(), True),
    StructField('qty', StringType(), True),
    StructField('composite_unit_of_measure', StringType(), True),
]))

per_schema = ArrayType(StructType([
    StructField('contact_function_cd', StringType(), True),
    StructField('contact_name', StringType(), True),
    StructField('communication_number_qualifier1', StringType(), True),
    StructField('contact_communication1', StringType(), True),
    StructField('communication_number_qualifier2', StringType(), True),
    StructField('contact_communication2', StringType(), True),
    StructField('communication_number_qualifier3', StringType(), True),
    StructField('contact_communication3', StringType(), True),
    StructField('contact_inquiry_reference', StringType(), True),
]), True)

service_adjustments_schema = ArrayType(StructType([
    StructField('adjustment_grp_cd', StringType(), True),
    StructField('adjustment_reason_cd_1', StringType(), True),
    StructField('adjustment_amount_1', StringType(), True),
    StructField('adjustment_quantity_1', StringType(), True),
    StructField('adjustment_reason_cd_2', StringType(), True),
    StructField('adjustment_amount_2', StringType(), True),
    StructField('adjustment_quantity_2', StringType(), True),
    StructField('adjustment_reason_cd_3', StringType(), True),
    StructField('adjustment_amount_3', StringType(), True),
    StructField('adjustment_quantity_3', StringType(), True),
    StructField('adjustment_reason_cd_4', StringType(), True),
    StructField('adjustment_amount_4', StringType(), True),
    StructField('adjustment_quantity_4', StringType(), True),
    StructField('adjustment_reason_cd_5', StringType(), True),
    StructField('adjustment_amount_5', StringType(), True),
    StructField('adjustment_quantity_5', StringType(), True),
    StructField('adjustment_reason_cd_6', StringType(), True),
    StructField('adjustment_amount_6', StringType(), True),
    StructField('adjustment_quantity_6', StringType(), True),
]))

# ---------------- Misc ----------------
provider_adjustments_schema = ArrayType(
    StructType([
        StructField('provider_identifier', StringType(), True),
        StructField('fiscal_period_date', StringType(), True),
        StructField('provider_adjustment_reason_cd_1', StringType(), True),
        StructField('provider_adjustment_id_1', StringType(), True),
        StructField('provider_adjustment_amt_1', StringType(), True),
        StructField('provider_adjustment_reason_cd_2', StringType(), True),
        StructField('provider_adjustment_id_2', StringType(), True),
        StructField('provider_adjustment_amt_2', StringType(), True),
        StructField('provider_adjustment_reason_cd_3', StringType(), True),
        StructField('provider_adjustment_id_3', StringType(), True),
        StructField('provider_adjustment_amt_3', StringType(), True),
        StructField('provider_adjustment_reason_cd_4', StringType(), True),
        StructField('provider_adjustment_id_4', StringType(), True),
        StructField('provider_adjustment_amt_4', StringType(), True),
        StructField('provider_adjustment_reason_cd_5', StringType(), True),
        StructField('provider_adjustment_id_5', StringType(), True),
        StructField('provider_adjustment_amt_5', StringType(), True),
        StructField('provider_adjustment_reason_cd_6', StringType(), True),
        StructField('provider_adjustment_id_6', StringType(), True),
        StructField('provider_adjustment_amt_6', StringType(), True),
    ]), True
)

claim_line_schema = ArrayType(StructType([
    StructField('claim_line_details', StructType([
        StructField('prcdr_cd', StringType(), True),
        StructField('chrg_amt', StringType(), True),
        StructField('paid_amt', StringType(), True),
        StructField('rev_cd', StringType(), True),
        StructField('units', StringType(), True),
        StructField('original_prcdr_cd', StringType(), True),
        StructField('original_units_of_service_count', StringType(), True),

    ]), True),
    StructField('claim_line_dates', dtm_schema, True),
    StructField('claim_line_supplemental_amount', amt_schema, True),
    StructField('claim_line_supplemental_quantity', qty_schema, True),
    StructField('claim_line_remarks', ArrayType(StructType([
        StructField('qualifier_cd', StringType(), True),
        StructField('remark_cd', StringType(), True),
    ]), True), True),
    StructField('claim_line_related_identifications', ref_schema, True),
    StructField('claim_line_adjustments', service_adjustments_schema, True),    
]))

claim_schema = StructType([
    StructField('clp', clp_schema, True),
    StructField('first_nm1_patient', nm1_schema, True),
    StructField('claim_names', claim_names_schema, True),
    StructField('claim_contacts', per_schema, True),
    StructField('claim_dates', dtm_schema, True),
    StructField('mia', mia_schema, True),
    StructField('moa', moa_schema, True),
    StructField('claim_adjustments', service_adjustments_schema, True),
    StructField('claim_related_identifications', ref_schema, True),
    StructField('claim_supplemental_amount', amt_schema, True),
    StructField('claim_supplemental_quantity', qty_schema, True),
    StructField('claim_lines', claim_line_schema, True),
])

claims_schema = [StructField('claims', ArrayType(claim_schema), True)]

# ---------------- File Info ----------------
file_info_schema = [StructField('filename', StringType(), True)]

# ---------------- Payee ----------------
payee_schema = StructType([
    StructField('entity_identifier_code', StringType(), True),
    StructField('payee_name', StringType(), True),
    StructField('id_code_qualifier', StringType(), True),
    StructField('payee_identifier', StringType(), True),
    StructField('entity_relationship_code', StringType(), True),
    StructField('payee_address_line_1', StringType(), True),
    StructField('payee_address_line_2', StringType(), True),
    StructField('payee_city_name', StringType(), True),
    StructField('payee_state_code', StringType(), True),
    StructField('payee_postal_zone_or_zip_code', StringType(), True),
    StructField('country_code', StringType(), True),
    StructField('location_qualifier', StringType(), True),
    StructField('country_subdivision_code', StringType(), True),
    StructField('payee_additional_identification', ref_schema, True),
    StructField('delivery_report_transmission_code', StringType(), True),
    StructField('delivery_name', StringType(), True),
    StructField('delivery_communication_number', StringType(), True),
    StructField('delivery_reference_identifier', StringType(), True),
])

# ---------------- Payer ----------------
payer_schema = StructType([
    StructField('entity_identifier_code', StringType(), True),
    StructField('payer_name', StringType(), True),
    StructField('id_code_qualifier', StringType(), True),
    StructField('payer_identifier', StringType(), True),
    StructField('entity_relationship_code', StringType(), True),
    StructField('payer_address_line_1', StringType(), True),
    StructField('payer_address_line_2', StringType(), True),
    StructField('payer_city_name', StringType(), True),
    StructField('payer_state_code', StringType(), True),
    StructField('payer_postal_zone_or_zip_code', StringType(), True),
    StructField('country_code', StringType(), True),
    StructField('location_qualifier', StringType(), True),
    StructField('country_subdivision_code', StringType(), True),
    StructField('payer_contact_info', per_schema, True),
    StructField('payer_additional_identification', ref_schema, True),
])

# ---------------- Payment ----------------
payment_schema = StructType([
    # StructField('transaction_handling_cd', StringType(), True),
    # StructField('monetary_amt', StringType(), True),
    # StructField('credit_debit_flag', StringType(), True),
    # StructField('payment_method_cd', StringType(), True),
    StructField('dtm', StructType([
        StructField('date_code', StringType(), True),
        StructField('date', StringType(), True),
        StructField('time', StringType(), True),
    ]), True),
    StructField('bpr', StructType([
        StructField('transaction_handling_code', StringType(), True),
        StructField('total_actual_provider_payment_amt', StringType(), True),
        StructField('creditor_debit_flag_code', StringType(), True),
        StructField('payment_method_code', StringType(), True),
        StructField('payment_format_code', StringType(), True),
        StructField('sender_dfiid_number_qualifier', StringType(), True),
        StructField('sender_dfi_identifier', StringType(), True),
        StructField('sender_account_number_qualifier', StringType(), True),
        StructField('sender_bank_acct_number', StringType(), True),
        StructField('payer_identifier', StringType(), True),
        StructField('payer_originating_co_supplemental_code', StringType(), True),
        StructField('receiver_dfiid_number_qualifier', StringType(), True),
        StructField('receiver_or_provider_bank_id_number', StringType(), True),
        StructField('receiver_acct_number_qualifier', StringType(), True),
        StructField('receiver_or_provider_account_number', StringType(), True),
        StructField('check_issue_or_eft_effective_date', StringType(), True),
        StructField('business_function_code', StringType(), True),
    ]), True),
    StructField('trn', StructType([
        StructField('trace_type_code', StringType(), True),
        StructField('check_or_eft_trace_number', StringType(), True),
        StructField('trace_payer_identifier', StringType(), True),
        StructField('trace_payer_originating_co_supplemental_code', StringType(), True),
    ]), True),

    # StructField('payment_date', StringType(), True),
    # StructField('trace_type_cd', StringType(), True),
    # StructField('trace_reference_id', StringType(), True),
    # StructField('trace_origin_company_id', StringType(), True),
])

# date_info_schema = StructType([
#     StructField('date_code', StringType(), True),
#     StructField('date', StringType(), True),
#     StructField('time', StringType(), True),
# ])

# ---------------- Final Schema ----------------
remittance_ref_schema = StructType(
    edi_schema +
    functional_group_schema +
    transaction_schema +
    [StructField('header_info', header_info_schema, True)] +
    # [StructField('date_info', date_info_schema, True)] +
    claims_schema +
    file_info_schema +
    [StructField('payee', payee_schema, True)] +
    [StructField('payer', payer_schema, True)] +
    [StructField('payment', payment_schema, True)] + 
    [StructField('provider_adjustments', provider_adjustments_schema, True)]
)

In [0]:
if v_parsing_success_df:
    v_parsed_structured_df = v_parsing_success_df.select(
        from_json(col("json_str"), remittance_ref_schema).alias("data")
    ).select("data.*")

    v_parsed_structured_with_audit_df = add_audit_fields(
        v_parsed_structured_df, lit(v_clientid), lit("tbd")
    )
    
    v_parsed_structured_with_audit_df = v_parsed_structured_with_audit_df.withColumn(
        "835UID",
        concat(
            coalesce(col("payment.trn.check_or_eft_trace_number"), lit("")),
            coalesce(col("payment.trn.trace_payer_identifier"), lit("")),
            coalesce(col("payment.bpr.check_issue_or_eft_effective_date"), lit("")),
            coalesce(col("payment.bpr.total_actual_provider_payment_amt"), lit("")),
        ),
    )

In [0]:
# try:
#     spark.sql(f"DESCRIBE SCHEMA {v_catalog}.{v_client_schema}")
#     print(f"Schema {v_catalog}.{v_client_schema} already exists. Skipping creation.")
# except AnalysisException:
create_stmt = (
    f"CREATE SCHEMA IF NOT EXISTS {v_catalog}.{v_client_schema} "
    f"MANAGED LOCATION '{v_location_client}'"
)
print(f"Creating schema: {create_stmt}")
spark.sql(create_stmt)


In [0]:
v_remittance_target_table_name = f"{v_catalog}.{v_client_schema}.remittance"
v_consolidated_target_table_name = f"{v_consolidated_catalog}.{v_consolidated_schema_name}.remittance"

v_success_remittance = append_or_create_table(v_parsed_structured_with_audit_df, v_remittance_target_table_name)
v_success_consolidated_remittance = append_or_create_table(v_parsed_structured_with_audit_df, v_consolidated_target_table_name)

In [0]:
v_parsed_success_with_dest_df = (v_parsing_success_df.select("filename")
                .withColumn("destination", concat(lit(v_archive_location_client), element_at(split(col("filename"), "/"), -1)),)
)

v_parsing_failures_with_dest_df = (v_parsing_failures_df.select("filename")
                .withColumn("destination", concat(lit(v_quarantined_location_client), element_at(split(col("filename"), "/"), -1)),)
)

v_quarantine_files_with_dest_df = (v_quarantine_files_df.select("filename")
                .withColumn("destination", concat(lit(v_quarantined_location_client), element_at(split(col("filename"), "/"), -1)),)
)

v_movement_tbl_df = v_parsed_success_with_dest_df.union(v_parsing_failures_with_dest_df).union(v_quarantine_files_with_dest_df)


In [0]:
for row in v_movement_tbl_df.collect():
    source = row['filename']
    target = row['destination']
    try:
        dbutils.fs.mv(source, target, True) 
        print(f"[MOVED] {source} → {target}")
    except Exception as e:
        print(f"[ERROR] Failed to move {source} → {target} | Reason: {str(e)}")
